Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 130 #813

Merged
merged 8 commits into from
Oct 27, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,4 @@ venv.bak/
isort-output.log
black-output.log
rule-linter-output.log
.vscode
7 changes: 7 additions & 0 deletions capa/ida/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ def collect_metadata():
"format": idaapi.get_file_type_name(),
"extractor": "ida",
"base_address": idaapi.get_imagebase(),
"layout": {
# this is updated after capabilities have been collected.
# will look like:
#
# "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... }
},

},
"version": capa.version.__version__,
}
Expand Down
1 change: 1 addition & 0 deletions capa/ida/plugin/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,7 @@ def slot_progress_feature_extraction(text):
meta = capa.ida.helpers.collect_metadata()
capabilities, counts = capa.main.find_capabilities(self.ruleset_cache, extractor, disable_progress=True)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = capa.main.compute_layout(self.ruleset_cache, extractor, capabilities)
except UserCancelledError:
logger.info("User cancelled analysis.")
return False
Expand Down
47 changes: 47 additions & 0 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,10 +582,56 @@ def collect_metadata(argv, sample_path, rules_path, extractor):
"extractor": extractor.__class__.__name__,
"rules": rules_path,
"base_address": extractor.get_base_address(),
"layout": {
# this is updated after capabilities have been collected.
# will look like:
#
# "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... }
},
},
}


def compute_layout(rules, extractor, capabilities):
"""
compute a metadata structure that links basic blocks
to the functions in which they're found.

only collect the basic blocks at which some rule matched.
otherwise, we may pollute the json document with
a large amount of un-referenced data.
"""
functions_by_bb = {}
bbs_by_function = {}
for f in extractor.get_functions():
bbs_by_function[int(f)] = []
for bb in extractor.get_basic_blocks(f):
functions_by_bb[int(bb)] = int(f)
bbs_by_function[int(f)].append(int(bb))

matched_bbs = set()
for rule_name, matches in capabilities.items():
rule = rules[rule_name]
if rule.meta.get("scope") == capa.rules.BASIC_BLOCK_SCOPE:
for (addr, match) in matches:
assert addr in functions_by_bb
matched_bbs.add(addr)

layout = {
"functions": {
f: {
"matched_basic_blocks": [bb for bb in bbs if bb in matched_bbs]
# this object is open to extension in the future,
# such as with the function name, etc.
}
for f, bbs in bbs_by_function.items()
}
}

return layout



def install_common_args(parser, wanted=None):
"""
register a common set of command line arguments for re-use by main & scripts.
Expand Down Expand Up @@ -948,6 +994,7 @@ def main(argv=None):

capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)

if has_file_limitation(rules, capabilities):
# bail if capa encountered file limitation e.g. a packed binary
Expand Down
12 changes: 11 additions & 1 deletion capa/render/vverbose.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ def render_rules(ostream, doc):
api: kernel32.GetLastError @ 0x10004A87
api: kernel32.OutputDebugString @ 0x10004767, 0x10004787, 0x10004816, 0x10004895
"""
functions_by_bb = {}
for function, info in doc["meta"]["analysis"]["layout"]["functions"].items():
for bb in info["matched_basic_blocks"]:
functions_by_bb[bb] = function

had_match = False
for rule in rutils.capability_rules(doc):
count = len(rule["matches"])
Expand Down Expand Up @@ -247,7 +252,12 @@ def render_rules(ostream, doc):
for location, match in sorted(doc["rules"][rule["meta"]["name"]]["matches"].items()):
ostream.write(rule["meta"]["scope"])
ostream.write(" @ ")
ostream.writeln(rutils.hex(location))
ostream.write(rutils.hex(location))

if rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
ostream.write(" in function " + rutils.hex(functions_by_bb[location]))

ostream.write("\n")
render_match(ostream, match, indent=1)
ostream.write("\n")

Expand Down
1 change: 1 addition & 0 deletions scripts/bulk-process.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def get_capa_results(args):
meta = capa.main.collect_metadata("", path, "", extractor)
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)

return {
"path": path,
Expand Down
9 changes: 5 additions & 4 deletions scripts/capa_as_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,15 @@ def render_dictionary(doc):

# ==== render dictionary helpers
def capa_details(file_path, output_format="dictionary"):

# collect metadata (used only to make rendering more complete)
meta = capa.main.collect_metadata("", file_path, RULES_PATH, extractor)

# extract features and find capabilities
extractor = capa.main.get_extractor(file_path, "auto", capa.main.BACKEND_VIV, [], False, disable_progress=True)
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)

# collect metadata (used only to make rendering more complete)
meta = capa.main.collect_metadata("", file_path, RULES_PATH, extractor)

meta["analysis"].update(counts)
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)

capa_output = False
if output_format == "dictionary":
Expand Down
23 changes: 17 additions & 6 deletions scripts/show-capabilities-by-function.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,34 @@ def render_matches_by_function(doc):
- send HTTP request
- connect to HTTP server
"""
functions_by_bb = {}
for function, info in doc["meta"]["analysis"]["layout"]["functions"].items():
for bb in info["matched_basic_blocks"]:
functions_by_bb[bb] = function

ostream = rutils.StringIO()

matches_by_function = collections.defaultdict(set)
for rule in rutils.capability_rules(doc):
for va in rule["matches"].keys():
matches_by_function[va].add(rule["meta"]["name"])
if rule["meta"]["scope"] == capa.rules.FUNCTION_SCOPE:
for va in rule["matches"].keys():
matches_by_function[va].add(rule["meta"]["name"])
elif rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
for va in rule["matches"].keys():
function = functions_by_bb[va]
matches_by_function[function].add(rule["meta"]["name"])
else:
# file scope
pass

for va, feature_count in sorted(doc["meta"]["analysis"]["feature_counts"]["functions"].items()):
va = int(va)
if not matches_by_function.get(va, {}):
continue
ostream.writeln("function at 0x%X with %d features: " % (va, feature_count))
for rule_name in matches_by_function[va]:
for rule_name in sorted(matches_by_function[va]):
ostream.writeln(" - " + rule_name)

ostream.write("\n")
return ostream.getvalue()


Expand Down Expand Up @@ -174,6 +186,7 @@ def main(argv=None):
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor)
capabilities, counts = capa.main.find_capabilities(rules, extractor)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)

if capa.main.has_file_limitation(rules, capabilities):
# bail if capa encountered file limitation e.g. a packed binary
Expand All @@ -190,8 +203,6 @@ def main(argv=None):
print(render_matches_by_function(doc))
colorama.deinit()

logger.info("done.")

return 0


Expand Down