-
Notifications
You must be signed in to change notification settings - Fork 569
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Display analysis information #2134
base: master
Are you sure you want to change the base?
Changes from all commits
ef846fc
76df545
64565da
5d75052
04c93dc
3a1504a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,8 @@ | |
from capa.rules import Scope, RuleSet | ||
from capa.engine import FeatureSet, MatchResults | ||
from capa.helpers import redirecting_print_to_tqdm | ||
from capa.features.file import Import | ||
from capa.features.insn import API | ||
from capa.capabilities.common import find_file_capabilities | ||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor | ||
|
||
|
@@ -96,7 +98,7 @@ def find_basic_block_capabilities( | |
|
||
def find_code_capabilities( | ||
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle | ||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]: | ||
) -> Tuple[MatchResults, MatchResults, MatchResults, FeatureSet]: | ||
""" | ||
find matches for the given rules within the given function. | ||
|
||
|
@@ -129,7 +131,7 @@ def find_code_capabilities( | |
function_features[feature].add(va) | ||
|
||
_, function_matches = ruleset.match(Scope.FUNCTION, function_features, fh.address) | ||
return function_matches, bb_matches, insn_matches, len(function_features) | ||
return function_matches, bb_matches, insn_matches, function_features | ||
|
||
|
||
def find_static_capabilities( | ||
|
@@ -141,6 +143,8 @@ def find_static_capabilities( | |
|
||
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=()) | ||
library_functions: Tuple[rdoc.LibraryFunction, ...] = () | ||
apicall_count: int = 0 | ||
import_count: int = 0 | ||
|
||
assert isinstance(extractor, StaticFeatureExtractor) | ||
with redirecting_print_to_tqdm(disable_progress): | ||
|
@@ -180,12 +184,18 @@ def pbar(s, *args, **kwargs): | |
pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)") | ||
continue | ||
|
||
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities( | ||
function_matches, bb_matches, insn_matches, function_features = find_code_capabilities( | ||
ruleset, extractor, f | ||
) | ||
feature_count = len(function_features) | ||
feature_counts.functions += ( | ||
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count), | ||
williballenthin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
# cumulatively count the total number of API calls | ||
for feature, vas in function_features.items(): | ||
if isinstance(feature, API): | ||
apicall_count += len(vas) | ||
|
||
t1 = time.time() | ||
|
||
match_count = 0 | ||
|
@@ -223,9 +233,15 @@ def pbar(s, *args, **kwargs): | |
rule = ruleset[rule_name] | ||
capa.engine.index_rule_matches(function_and_lower_features, rule, locations) | ||
|
||
all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features) | ||
all_file_matches, file_features = find_file_capabilities(ruleset, extractor, function_and_lower_features) | ||
feature_count = len(file_features) | ||
feature_counts.file = feature_count | ||
|
||
# cumulatively count the total number of Import features | ||
for feature, _ in file_features.items(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use |
||
if isinstance(feature, Import): | ||
import_count += 1 | ||
|
||
matches: MatchResults = dict( | ||
itertools.chain( | ||
# each rule exists in exactly one scope, | ||
|
@@ -241,6 +257,8 @@ def pbar(s, *args, **kwargs): | |
meta = { | ||
"feature_counts": feature_counts, | ||
"library_functions": library_functions, | ||
"apicall_count": apicall_count, | ||
"import_count": import_count, | ||
} | ||
|
||
return matches, meta |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,9 @@ | |
|
||
tabulate.PRESERVE_WHITESPACE = True | ||
|
||
MIN_LIBFUNCS_RATIO = 0.4 | ||
MIN_API_CALLS = 10 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where did these numbers come from? and how should i interpret them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good explanations! would you include the key parts here as a comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also i'm interested to see how frequently this message is shown to users. I don't think our dogs will identify 40% of functions in most binaries, so i'm a little concerned this message will be shown too often. have you had a chance to collect these stats against a large number of samples? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's still helpful information since we know there's most likely more library code than we've identified. |
||
|
||
|
||
def width(s: str, character_count: int) -> str: | ||
"""pad the given string to at least `character_count`""" | ||
|
@@ -29,6 +32,27 @@ def width(s: str, character_count: int) -> str: | |
|
||
|
||
def render_meta(doc: rd.ResultDocument, ostream: StringIO): | ||
if isinstance(doc.meta.analysis, rd.StaticAnalysis): | ||
|
||
if doc.meta.analysis.apicall_count < MIN_API_CALLS: | ||
ostream.write( | ||
rutils.bold( | ||
"The analyzed sample reports very few API calls, this could indicate that it is packed, encrypted, corrupted, or tiny\n" | ||
) | ||
) | ||
|
||
n_libs: int = len(doc.meta.analysis.library_functions) | ||
n_funcs: int = len(doc.meta.analysis.feature_counts.functions) | ||
lib_ratio: float = n_libs / (n_funcs + n_libs) if (n_funcs + n_libs) > 0 else 0 | ||
|
||
if lib_ratio < MIN_LIBFUNCS_RATIO: | ||
ostream.write( | ||
rutils.bold( | ||
"Few library functions (%.2f%% of all functions) recognized by FLIRT signatures, results may contain false positives\n" | ||
) | ||
% (100 * lib_ratio) | ||
) | ||
|
||
rows = [ | ||
(width("md5", 22), width(doc.meta.sample.md5, 82)), | ||
("sha1", doc.meta.sample.sha1), | ||
|
Large diffs are not rendered by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changing the signature of a function is a breaking change, so this should wait until the next major release.