Skip to content

Commit

Permalink
Merge pull request #1792 from xusheng6/binja_symtab
Browse files Browse the repository at this point in the history
binja: add support for symtab names. Fix #1504
  • Loading branch information
williballenthin authored Sep 21, 2023
2 parents 7abcf3d + 55af6f0 commit 5710670
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 32 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### New Features
- ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan
- ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff
- binja: add support for symtab names #1504 @xusheng6

### Breaking Changes

Expand Down
20 changes: 11 additions & 9 deletions capa/features/extractors/binja/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,17 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
"""
for sym_name in bv.symbols:
for sym in bv.symbols[sym_name]:
if sym.type == SymbolType.LibraryFunctionSymbol:
name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address
if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]:
continue

name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address


def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
Expand Down
26 changes: 24 additions & 2 deletions capa/features/extractors/binja/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
# See the License for the specific language governing permissions and limitations under the License.
from typing import Tuple, Iterator

from binaryninja import Function, BinaryView, RegisterValueType, LowLevelILOperation
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation

from capa.features.file import FunctionName
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
Expand Down Expand Up @@ -73,10 +74,31 @@ def extract_recursive_call(fh: FunctionHandle):
yield Characteristic("recursive call"), fh.address


def extract_function_name(fh: FunctionHandle):
"""extract function names (e.g., symtab names)"""
func: Function = fh.inner
bv: BinaryView = func.view
if bv is None:
return

for sym in bv.get_symbols(func.start):
if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]:
continue

name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address


def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
yield feature, addr


FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
44 changes: 24 additions & 20 deletions capa/features/extractors/binja/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,28 +94,32 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
candidate_addrs.append(stub_addr)

for address in candidate_addrs:
sym = func.view.get_symbol_at(address)
if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]:
continue

sym_name = sym.short_name

lib_name = ""
import_lib = bv.lookup_imported_object_library(sym.address)
if import_lib is not None:
lib_name = import_lib[0].name
if lib_name.endswith(".dll"):
lib_name = lib_name[:-4]
elif lib_name.endswith(".so"):
lib_name = lib_name[:-3]

for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
yield API(name), ih.address

if sym_name.startswith("_"):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
for sym in func.view.get_symbols(address):
if sym is None or sym.type not in [
SymbolType.ImportAddressSymbol,
SymbolType.ImportedFunctionSymbol,
SymbolType.FunctionSymbol,
]:
continue

sym_name = sym.short_name

lib_name = ""
import_lib = bv.lookup_imported_object_library(sym.address)
if import_lib is not None:
lib_name = import_lib[0].name
if lib_name.endswith(".dll"):
lib_name = lib_name[:-4]
elif lib_name.endswith(".so"):
lib_name = lib_name[:-3]

for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
yield API(name), ih.address

if sym_name.startswith("_"):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
yield API(name), ih.address


def extract_insn_number_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
Expand Down
2 changes: 1 addition & 1 deletion tests/test_binja_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS,
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_binja_features(sample, scope, feature, expected):
Expand Down

0 comments on commit 5710670

Please sign in to comment.