diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fe6ae2b2..d17792122 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Change Log ## master (unreleased) +- extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736 ### New Features - Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 0fedb49b5..1f23d53e3 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -91,6 +91,20 @@ class Shdr: entsize: int buf: bytes + @classmethod + def from_viv(cls, section, buf: bytes) -> "Shdr": + return cls( + section.sh_name, + section.sh_type, + section.sh_flags, + section.sh_addr, + section.sh_offset, + section.sh_size, + section.sh_link, + section.sh_entsize, + buf, + ) + class ELF: def __init__(self, f: BinaryIO): @@ -695,6 +709,29 @@ def get_symbols(self) -> Iterator[Symbol]: for symbol in self.symbols: yield symbol + @classmethod + def from_Elf(cls, ElfBinary) -> Optional["SymTab"]: + endian = "<" if ElfBinary.getEndian() == 0 else ">" + bitness = ElfBinary.bits + + SHT_SYMTAB = 0x2 + for section in ElfBinary.sections: + if section.sh_info & SHT_SYMTAB: + strtab_section = ElfBinary.sections[section.sh_link] + sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size)) + sh_strtab = Shdr.from_viv( + strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size) + ) + + try: + return cls(endian, bitness, sh_symtab, sh_strtab) + except NameError: + return None + except: + # all exceptions that could be encountered by + # cls._parse() imply a faulty symbol's table. + raise CorruptElfFile("malformed symbol's table") + def guess_os_from_osabi(elf: ELF) -> Optional[OS]: return elf.ei_osabi diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index 4d877ab27..16b97ef39 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator +from typing import Any, Dict, List, Tuple, Iterator import viv_utils import viv_utils.flirt @@ -49,8 +49,11 @@ def extract_file_features(self): yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf) def get_functions(self) -> Iterator[FunctionHandle]: + cache: Dict[str, Any] = {} for va in sorted(self.vw.getFunctions()): - yield FunctionHandle(address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va)) + yield FunctionHandle( + address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache} + ) def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: yield from capa.features.extractors.viv.function.extract_features(fh) diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index 50d5792eb..87403a65d 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -11,9 +11,11 @@ import viv_utils import vivisect.const +from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops +from capa.features.extractors.elf import SymTab from capa.features.extractors.base_extractor import FunctionHandle @@ -30,6 +32,28 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature raise NotImplementedError +def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + if fh.inner.vw.metadata["Format"] == "elf": + # the file's symbol table gets added to the metadata of the vivisect workspace. + # this is in order to eliminate the computational overhead of refetching symtab each time. + if "symtab" not in fh.ctx["cache"]: + try: + fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin) + except: + fh.ctx["cache"]["symtab"] = None + + symtab = fh.ctx["cache"]["symtab"] + if symtab: + for symbol in symtab.get_symbols(): + sym_name = symtab.get_name(symbol) + sym_value = symbol.value + sym_info = symbol.info + + STT_FUNC = 0x2 + if sym_value == fh.address and sym_info & STT_FUNC != 0: + yield FunctionName(sym_name), fh.address + + def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: f: viv_utils.Function = fhandle.inner for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE): @@ -79,4 +103,8 @@ def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop) +FUNCTION_HANDLERS = ( + extract_function_symtab_names, + extract_function_calls_to, + extract_function_loop, +) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index d324f31e2..d8d6edbe0 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -19,9 +19,11 @@ import capa.features.extractors.helpers import capa.features.extractors.viv.helpers +from capa.features.file import FunctionName from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress +from capa.features.extractors.elf import Shdr, SymTab from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call @@ -109,6 +111,26 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if not target: return + if f.vw.metadata["Format"] == "elf": + if "symtab" not in fh.ctx["cache"]: + # the symbol table gets stored as a function's attribute in order to avoid running + # this code everytime the call is made, thus preventing the computational overhead. + try: + fh.ctx["cache"]["symtab"] = SymTab.from_Elf(f.vw.parsedbin) + except: + fh.ctx["cache"]["symtab"] = None + + symtab = fh.ctx["cache"]["symtab"] + if symtab: + for symbol in symtab.get_symbols(): + sym_name = symtab.get_name(symbol) + sym_value = symbol.value + sym_info = symbol.info + + STT_FUNC = 0x2 + if sym_value == target and sym_info & STT_FUNC != 0: + yield API(sym_name), ih.address + if viv_utils.flirt.is_library_function(f.vw, target): name = viv_utils.get_function_name(f.vw, target) yield API(name), ih.address diff --git a/tests/fixtures.py b/tests/fixtures.py index 04c9c53bb..84e40209a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -761,6 +761,47 @@ def parametrize(params, values, **kwargs): key=lambda t: (t[0], t[1]), ) +# this list should be merged into the one above (FEATURE_PRESENSE_TESTS) +# once the debug symbol functionality has been added to all backends +FEATURE_SYMTAB_FUNC_TESTS = [ + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.insn.API("__GI_connect"), + True, + ), + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.insn.API("connect"), + True, + ), + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.insn.API("__libc_connect"), + True, + ), + ( + "2bf18d", + "function=0x4088a4", + capa.features.file.FunctionName("__GI_connect"), + True, + ), + ( + "2bf18d", + "function=0x4088a4", + capa.features.file.FunctionName("connect"), + True, + ), + ( + "2bf18d", + "function=0x4088a4", + capa.features.file.FunctionName("__libc_connect"), + True, + ), +] + FEATURE_PRESENCE_TESTS_DOTNET = sorted( [ ("b9f5b", "file", Arch(ARCH_I386), True), diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index fcf49c848..58ce5ace4 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -11,7 +11,7 @@ @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS, + fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, indirect=["sample", "scope"], ) def test_viv_features(sample, scope, feature, expected):