Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Binja backend #1343

Merged
merged 24 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
42e8776
Update gitignore for pipfile and cache folder
xusheng6 Feb 27, 2023
b49213b
Include the type of value when the value of a Number is unexpected
xusheng6 Mar 3, 2023
1babdb0
Update readme for generating rule cache
xusheng6 Mar 4, 2023
4a698ff
Add a Binary Ninja backend for capa
xusheng6 Feb 24, 2023
c648af2
Select a different test file for the nzxor feature
xusheng6 Mar 4, 2023
b4974a8
Fix typo in OS name
xusheng6 Mar 7, 2023
64c5425
Fix the placement of some imports
xusheng6 Mar 7, 2023
e9d4a23
Do MLIL basic block look-up in get_basic_blocks to avoid a O(n^2) alg…
xusheng6 Mar 9, 2023
70f6f1c
Use the binja extractor to get functions/basic blocks/instructions wh…
xusheng6 Mar 9, 2023
64323b3
Encode the path with utf8 and then convert to hex in find_binja_path
xusheng6 Mar 9, 2023
7d12942
Merge branch 'binja_backend' of github.com:Vector35/capa into Vector3…
williballenthin Mar 23, 2023
c249b84
tests: binja: ensure the license is valid
williballenthin Mar 23, 2023
1a29077
tests: binja: don't crash on bad license - log instead
williballenthin Mar 23, 2023
8f8a0b1
ci: add test workflow for binja testing
williballenthin Mar 23, 2023
21ecc76
ci: binja: fix curl
williballenthin Mar 23, 2023
ad442aa
ci: binja: fix curl output
williballenthin Mar 23, 2023
dcddef0
ci: binja: inject secrets
williballenthin Mar 23, 2023
d8aa276
tests: debug binja api
williballenthin Mar 23, 2023
3be7bbb
ci: binja: log more
williballenthin Mar 23, 2023
8bde277
ci: binja: update installer to use root
williballenthin Mar 23, 2023
613ce92
tests: remove old debugging statements
williballenthin Mar 23, 2023
89803e7
ci: add binary ninja installation and test invocation
williballenthin Mar 23, 2023
0d4a92a
gitignore
williballenthin Mar 23, 2023
986e2e6
Merge pull request #1 from mandiant/binja-ci
xusheng6 Mar 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/pyinstaller/pyinstaller.spec
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ a = Analysis(
"qt5",
"pyqtwebengine",
"pyasn1",
"binaryninja",
],
)

Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,6 @@ scripts/perf/*.zip
.envrc
.DS_Store
*/.DS_Store
Pipfile
Pipfile.lock
/cache/
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### New Features

- extractor: add Binary Ninja feature extractor @xusheng6

### Breaking Changes

### New Rules (12)
Expand Down
Empty file.
147 changes: 147 additions & 0 deletions capa/features/extractors/binja/basicblock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

import sys
import string
import struct
from typing import Tuple, Iterator

from binaryninja import Function, Variable
from binaryninja import BasicBlock as BinjaBasicBlock
from binaryninja import (
BinaryView,
VariableSourceType,
MediumLevelILSetVar,
MediumLevelILOperation,
MediumLevelILInstruction,
)

from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.basicblock import BasicBlock
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle


def get_printable_len(il: MediumLevelILSetVar) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
width = il.dest.type.width
value = il.src.value.value

if width == 1:
chars = struct.pack("<B", value & 0xFF)
elif width == 2:
chars = struct.pack("<H", value & 0xFFFF)
elif width == 4:
chars = struct.pack("<I", value & 0xFFFFFFFF)
elif width == 8:
chars = struct.pack("<Q", value & 0xFFFFFFFFFFFFFFFF)
else:
return 0

def is_printable_ascii(chars_: bytes):
return all(c < 127 and chr(c) in string.printable for c in chars_)

def is_printable_utf16le(chars_: bytes):
if all(c == 0x00 for c in chars_[1::2]):
return is_printable_ascii(chars_[::2])
williballenthin marked this conversation as resolved.
Show resolved Hide resolved

if is_printable_ascii(chars):
return width

if is_printable_utf16le(chars):
return width // 2

return 0


def is_mov_imm_to_stack(il: MediumLevelILInstruction) -> bool:
"""verify instruction moves immediate onto stack"""
if il.operation != MediumLevelILOperation.MLIL_SET_VAR:
return False

if il.src.operation != MediumLevelILOperation.MLIL_CONST:
return False

if not il.dest.source_type == VariableSourceType.StackVariableSourceType:
return False

return True


def bb_contains_stackstring(f: Function, bb: BinjaBasicBlock) -> bool:
"""check basic block for stackstring indicators

true if basic block contains enough moves of constant bytes to the stack
"""
count = 0
mlil_bbs = [mlil_bb for mlil_bb in bb.function.mlil_basic_blocks if mlil_bb.source_block.start == bb.start]
williballenthin marked this conversation as resolved.
Show resolved Hide resolved
for mlil_bb in mlil_bbs:
for il in mlil_bb:
if is_mov_imm_to_stack(il):
count += get_printable_len(il)
if count > MIN_STACKSTRING_LEN:
return True
return False


def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
"""extract stackstring indicators from basic block"""
bb: BinjaBasicBlock = bbh.inner
if bb_contains_stackstring(fh.inner, bbh.inner):
yield Characteristic("stack string"), bbh.address


def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
"""extract tight loop indicators from a basic block"""
bb: BinjaBasicBlock = bbh.inner
for edge in bb.outgoing_edges:
if edge.target.start == bb.start:
yield Characteristic("tight loop"), bbh.address


def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
"""extract basic block features"""
for bb_handler in BASIC_BLOCK_HANDLERS:
for feature, addr in bb_handler(fh, bbh):
yield feature, addr
yield BasicBlock(), bbh.address


BASIC_BLOCK_HANDLERS = (
extract_bb_tight_loop,
extract_bb_stackstring,
)


def main():
if len(sys.argv) < 2:
return

import pprint

from binaryninja import BinaryViewType

bv: BinaryView = BinaryViewType.get_view_of_file(sys.argv[1])
if bv is None:
return

features = []
for f in bv.functions:
fh = FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
for bb in f.basic_blocks:
bbh = BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)
features.extend(list(extract_features(fh, bbh)))

import pprint

pprint.pprint(features)


if __name__ == "__main__":
main()
68 changes: 68 additions & 0 deletions capa/features/extractors/binja/extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import List, Tuple, Iterator

import binaryninja as binja

import capa.features.extractors.elf
import capa.features.extractors.binja.file
import capa.features.extractors.binja.insn
import capa.features.extractors.binja.global_
import capa.features.extractors.binja.function
import capa.features.extractors.binja.basicblock
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor


class BinjaFeatureExtractor(FeatureExtractor):
def __init__(self, bv: binja.BinaryView):
super().__init__()
self.bv = bv
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))

def get_base_address(self):
return AbsoluteVirtualAddress(self.bv.start)

def extract_global_features(self):
yield from self.global_features

def extract_file_features(self):
yield from capa.features.extractors.binja.file.extract_features(self.bv)

def get_functions(self) -> Iterator[FunctionHandle]:
for f in self.bv.functions:
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)

def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.binja.function.extract_features(fh)

def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
f: binja.Function = fh.inner
for bb in f.basic_blocks:
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)

def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)

def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
import capa.features.extractors.binja.helpers as binja_helpers

bb: binja.BasicBlock = bbh.inner
addr = bb.start

for text, length in bb:
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
addr += length

def extract_insn_features(self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle):
yield from capa.features.extractors.binja.insn.extract_features(fh, bbh, ih)
Loading