Skip to content

Commit

Permalink
refactor main to for ease of integration (#1948)
Browse files Browse the repository at this point in the history
* main: split main into a bunch of "main routines"

[wip] since there are a few references to BinExport2
that are in progress elsewhre. Next commit will remove them.

* main: remove references to wip BinExport2 code

* changelog

* main: rename first position argument "input_file"

closes #1946

* main: linters

* main: move rule-related routines to capa.rules

ref #1821

* main: extract routines to capa.loader module

closes #1821

* add loader module

* loader: learn to load freeze format

* freeze: use new cli arg handling

* Update capa/loader.py

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>

* main: remove duplicate documentation

* main: add doc about where some functions live

* scripts: migrate to new main wrapper helper functions

* scripts: port to main routines

* main: better handle auto-detection of backend

* scripts: migrate bulk-process to main wrappers

* scripts: migrate scripts to main wrappers

* main: rename *_from_args to *_from_cli

* changelog

* cache-ruleset: remove duplication

* main: fix tag handling

* cache-ruleset: fix cli args

* cache-ruleset: fix special rule cli handling

* scripts: fix type bytes

* main: remove old TODO message

* loader: fix references to binja extractor

---------

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
  • Loading branch information
williballenthin and mr-tz authored Jan 29, 2024
1 parent d2e1a47 commit c3301d3
Show file tree
Hide file tree
Showing 26 changed files with 1,295 additions and 1,142 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

### Breaking Changes

- main: introduce wrapping routines within main for working with CLI args #1813 @williballenthin
- move functions from `capa.main` to new `capa.loader` namespace #1821 @williballenthin

### New Rules (0)

-
Expand Down
8 changes: 6 additions & 2 deletions capa/features/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,18 +458,22 @@ def evaluate(self, ctx, **kwargs):
FORMAT_SC32 = "sc32"
FORMAT_SC64 = "sc64"
FORMAT_CAPE = "cape"
FORMAT_FREEZE = "freeze"
FORMAT_RESULT = "result"
STATIC_FORMATS = {
FORMAT_SC32,
FORMAT_SC64,
FORMAT_PE,
FORMAT_ELF,
FORMAT_DOTNET,
FORMAT_FREEZE,
FORMAT_RESULT,
}
DYNAMIC_FORMATS = {
FORMAT_CAPE,
FORMAT_FREEZE,
FORMAT_RESULT,
}
FORMAT_FREEZE = "freeze"
FORMAT_RESULT = "result"
FORMAT_UNKNOWN = "unknown"


Expand Down
4 changes: 2 additions & 2 deletions capa/features/extractors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
MATCH_JSON_OBJECT = b'{"'


def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]:
"""
extract ASCII and UTF-16 LE strings from file
"""
Expand All @@ -56,7 +56,7 @@ def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
yield String(s.s), FileOffsetAddress(s.offset)


def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
if buf.startswith(MATCH_PE):
yield Format(FORMAT_PE), NO_ADDRESS
elif buf.startswith(MATCH_ELF):
Expand Down
15 changes: 10 additions & 5 deletions capa/features/freeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# https://github.com/mandiant/capa/issues/1699
from typing_extensions import TypeAlias

import capa.loader
import capa.helpers
import capa.version
import capa.features.file
Expand Down Expand Up @@ -681,14 +682,18 @@ def main(argv=None):
argv = sys.argv[1:]

parser = argparse.ArgumentParser(description="save capa features to a file")
capa.main.install_common_args(parser, {"sample", "format", "backend", "os", "signatures"})
capa.main.install_common_args(parser, {"input_file", "format", "backend", "os", "signatures"})
parser.add_argument("output", type=str, help="Path to output file")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)

sigpaths = capa.main.get_signatures(args.signatures)

extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False)
try:
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
input_format = capa.main.get_input_format_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
return e.status_code

Path(args.output).write_bytes(dump(extractor))

Expand Down
8 changes: 4 additions & 4 deletions capa/ghidra/capa_ghidra.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def run_headless():
rules_path = pathlib.Path(args.rules)

logger.debug("rule path: %s", rules_path)
rules = capa.main.get_rules([rules_path])
rules = capa.rules.get_rules([rules_path])

meta = capa.ghidra.helpers.collect_metadata([rules_path])
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
Expand All @@ -78,7 +78,7 @@ def run_headless():

meta.analysis.feature_counts = counts["feature_counts"]
meta.analysis.library_functions = counts["library_functions"]
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)

if capa.capabilities.common.has_file_limitation(rules, capabilities, is_standalone=True):
logger.info("capa encountered warnings during analysis")
Expand Down Expand Up @@ -119,7 +119,7 @@ def run_ui():
rules_path: pathlib.Path = pathlib.Path(rules_dir)
logger.info("running capa using rules from %s", str(rules_path))

rules = capa.main.get_rules([rules_path])
rules = capa.rules.get_rules([rules_path])

meta = capa.ghidra.helpers.collect_metadata([rules_path])
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
Expand All @@ -128,7 +128,7 @@ def run_ui():

meta.analysis.feature_counts = counts["feature_counts"]
meta.analysis.library_functions = counts["library_functions"]
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)

if capa.capabilities.common.has_file_limitation(rules, capabilities, is_standalone=False):
logger.info("capa encountered warnings during analysis")
Expand Down
28 changes: 27 additions & 1 deletion capa/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import sys
import json
import inspect
import logging
Expand All @@ -16,12 +17,22 @@
import tqdm

from capa.exceptions import UnsupportedFormatError
from capa.features.common import FORMAT_PE, FORMAT_CAPE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
from capa.features.common import (
FORMAT_PE,
FORMAT_CAPE,
FORMAT_SC32,
FORMAT_SC64,
FORMAT_DOTNET,
FORMAT_FREEZE,
FORMAT_UNKNOWN,
Format,
)

EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
EXTENSIONS_DYNAMIC = ("json", "json_")
EXTENSIONS_ELF = "elf_"
EXTENSIONS_FREEZE = "frz"

logger = logging.getLogger("capa")

Expand Down Expand Up @@ -81,6 +92,8 @@ def get_format_from_extension(sample: Path) -> str:
format_ = FORMAT_SC64
elif sample.name.endswith(EXTENSIONS_DYNAMIC):
format_ = get_format_from_report(sample)
elif sample.name.endswith(EXTENSIONS_FREEZE):
format_ = FORMAT_FREEZE
return format_


Expand Down Expand Up @@ -201,3 +214,16 @@ def log_unsupported_runtime_error():
" If you're seeing this message on the command line, please ensure you're running a supported Python version."
)
logger.error("-" * 80)


def is_running_standalone() -> bool:
"""
are we running from a PyInstaller'd executable?
if so, then we'll be able to access `sys._MEIPASS` for the packaged resources.
"""
# typically we only expect capa.main to be packaged via PyInstaller.
# therefore, this *should* be in capa.main; however,
# the Binary Ninja extractor uses this to resolve the BN API code,
# so we keep this in a common area.
# generally, other library code should not use this function.
return hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS")
4 changes: 2 additions & 2 deletions capa/ida/plugin/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def on_load_rule(_, i, total):
if ida_kernwin.user_cancelled():
raise UserCancelledError("user cancelled")

return capa.main.get_rules([rule_path], on_load_rule=on_load_rule)
return capa.rules.get_rules([rule_path], on_load_rule=on_load_rule)
except UserCancelledError:
logger.info("User cancelled analysis.")
return None
Expand Down Expand Up @@ -775,7 +775,7 @@ def slot_progress_feature_extraction(text):

meta.analysis.feature_counts = counts["feature_counts"]
meta.analysis.library_functions = counts["library_functions"]
meta.analysis.layout = capa.main.compute_layout(ruleset, self.feature_extractor, capabilities)
meta.analysis.layout = capa.loader.compute_layout(ruleset, self.feature_extractor, capabilities)
except UserCancelledError:
logger.info("User cancelled analysis.")
return False
Expand Down
Loading

0 comments on commit c3301d3

Please sign in to comment.