From 55af6f052f6ab9ffc73e4f67a837c2f0d68f077c Mon Sep 17 00:00:00 2001 From: Xusheng Date: Thu, 21 Sep 2023 17:24:42 +0800 Subject: [PATCH] binja: add support for symtab names. Fix #1504 --- CHANGELOG.md | 1 + capa/features/extractors/binja/file.py | 20 +++++----- capa/features/extractors/binja/function.py | 26 ++++++++++++- capa/features/extractors/binja/insn.py | 44 ++++++++++++---------- tests/test_binja_features.py | 2 +- 5 files changed, 61 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc0d9a50e..020ad3436 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features - ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan - ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff +- binja: add support for symtab names #1504 @xusheng6 ### Breaking Changes diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index d46451e77..034b1636a 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -125,15 +125,17 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre """ for sym_name in bv.symbols: for sym in bv.symbols[sym_name]: - if sym.type == SymbolType.LibraryFunctionSymbol: - name = sym.short_name - yield FunctionName(name), sym.address - if name.startswith("_"): - # some linkers may prefix linked routines with a `_` to avoid name collisions. - # extract features for both the mangled and un-mangled representations. - # e.g. `_fwrite` -> `fwrite` - # see: https://stackoverflow.com/a/2628384/87207 - yield FunctionName(name[1:]), sym.address + if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]: + continue + + name = sym.short_name + yield FunctionName(name), sym.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), sym.address def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index d2e67aa3a..520de0b3f 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -7,8 +7,9 @@ # See the License for the specific language governing permissions and limitations under the License. from typing import Tuple, Iterator -from binaryninja import Function, BinaryView, RegisterValueType, LowLevelILOperation +from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation +from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops @@ -73,10 +74,31 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), fh.address +def extract_function_name(fh: FunctionHandle): + """extract function names (e.g., symtab names)""" + func: Function = fh.inner + bv: BinaryView = func.view + if bv is None: + return + + for sym in bv.get_symbols(func.start): + if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]: + continue + + name = sym.short_name + yield FunctionName(name), sym.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), sym.address + + def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call) +FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 3144fd15a..f2b8fefc2 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -94,28 +94,32 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) candidate_addrs.append(stub_addr) for address in candidate_addrs: - sym = func.view.get_symbol_at(address) - if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]: - continue - - sym_name = sym.short_name - - lib_name = "" - import_lib = bv.lookup_imported_object_library(sym.address) - if import_lib is not None: - lib_name = import_lib[0].name - if lib_name.endswith(".dll"): - lib_name = lib_name[:-4] - elif lib_name.endswith(".so"): - lib_name = lib_name[:-3] - - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name): - yield API(name), ih.address - - if sym_name.startswith("_"): - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]): + for sym in func.view.get_symbols(address): + if sym is None or sym.type not in [ + SymbolType.ImportAddressSymbol, + SymbolType.ImportedFunctionSymbol, + SymbolType.FunctionSymbol, + ]: + continue + + sym_name = sym.short_name + + lib_name = "" + import_lib = bv.lookup_imported_object_library(sym.address) + if import_lib is not None: + lib_name = import_lib[0].name + if lib_name.endswith(".dll"): + lib_name = lib_name[:-4] + elif lib_name.endswith(".so"): + lib_name = lib_name[:-3] + + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name): yield API(name), ih.address + if sym_name.startswith("_"): + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]): + yield API(name), ih.address + def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index fdb7ff88b..3d51886d4 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -36,7 +36,7 @@ @pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS, + fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, indirect=["sample", "scope"], ) def test_binja_features(sample, scope, feature, expected):