From fb34b1674bd85ca8a0f0eaacd0af09599c178044 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 25 Mar 2022 13:34:39 -0600 Subject: [PATCH] improve handling _ prefix added to library functions as compile/link artifact (#924) --- CHANGELOG.md | 1 + capa/features/extractors/ida/file.py | 6 ++++++ capa/features/extractors/ida/insn.py | 6 ++++++ capa/features/extractors/viv/file.py | 6 ++++++ capa/features/extractors/viv/insn.py | 6 ++++++ 5 files changed, 25 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb3b18710..9833732b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - ### Bug Fixes +- improve handling _ prefix compile/link artifact #924 @mike-hunhoff ### capa explorer IDA Pro plugin - improve file format extraction #918 @mike-hunhoff diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index d938e3937..29fe77375 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -152,6 +152,12 @@ def extract_file_function_names(): if idaapi.get_func(ea).flags & idaapi.FUNC_LIB: name = idaapi.get_name(ea) yield FunctionName(name), ea + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), ea def extract_file_format(): diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 4b56bb50e..c8ccb2ee1 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -114,6 +114,12 @@ def extract_insn_api_features(f, bb, insn): if target_func.flags & idaapi.FUNC_LIB: name = idaapi.get_name(target_func.start_ea) yield API(name), insn.ea + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield API(name[1:]), insn.ea def extract_insn_number_features(f, bb, insn): diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index db4b8b5e0..718e014e5 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -79,6 +79,12 @@ def extract_file_function_names(vw, **kwargs): if viv_utils.flirt.is_library_function(vw, va): name = viv_utils.get_function_name(vw, va) yield FunctionName(name), va + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), va def extract_file_format(buf, **kwargs): diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index a18ff986f..921f7c694 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -119,6 +119,12 @@ def extract_insn_api_features(f, bb, insn): if viv_utils.flirt.is_library_function(f.vw, target): name = viv_utils.get_function_name(f.vw, target) yield API(name), insn.va + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield API(name[1:]), insn.va return for _ in range(THUNK_CHAIN_DEPTH_DELTA):