Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve and fix various dynamic parts #1809

Merged
merged 2 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions capa/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ class UnsupportedArchError(ValueError):

class UnsupportedOSError(ValueError):
pass


class EmptyReportError(ValueError):
pass
2 changes: 1 addition & 1 deletion capa/features/extractors/cape/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
"""
this method extrcts the given call's features (such as API name and arguments),
this method extracts the given call's features (such as API name and arguments),
and returns them as API, Number, and String features.
args:
Expand Down
12 changes: 10 additions & 2 deletions capa/features/extractors/cape/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
import capa.features.extractors.cape.thread
import capa.features.extractors.cape.global_
import capa.features.extractors.cape.process
from capa.exceptions import UnsupportedFormatError
from capa.exceptions import EmptyReportError, UnsupportedFormatError
from capa.features.common import Feature, Characteristic
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress, _NoAddress
from capa.features.extractors.cape.models import CapeReport
from capa.features.extractors.cape.models import Static, CapeReport
from capa.features.extractors.base_extractor import (
CallHandle,
SampleHashes,
Expand Down Expand Up @@ -85,10 +85,18 @@ def from_report(cls, report: Dict) -> "CapeExtractor":
if cr.info.version not in TESTED_VERSIONS:
logger.warning("CAPE version '%s' not tested/supported yet", cr.info.version)

# observed in 2.4-CAPE reports from capesandbox.com
if cr.static is None and cr.target.file.pe is not None:
cr.static = Static()
cr.static.pe = cr.target.file.pe
Comment on lines +89 to +91
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

noticed that this may store the data instead, maybe there's a better way to handle this?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add a comment with a reference to such a report and CAPE version.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to add all/many/few reports to capa testfiles?
I'm pulling down sandbox data for all our current testfile EXEs and DLLs.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it would be good to have a fair collection, but not necessarily one for every sample.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding a few initially here: mandiant/capa-testfiles#217


if cr.static is None:
raise UnsupportedFormatError("CAPE report missing static analysis")

if cr.static.pe is None:
raise UnsupportedFormatError("CAPE report missing PE analysis")

if len(cr.behavior.processes) == 0:
raise EmptyReportError("CAPE did not capture any processes")
Comment on lines +99 to +100
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

such empty reports are fairly useless


return cls(cr)
22 changes: 17 additions & 5 deletions capa/features/extractors/cape/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,21 @@ class DigitalSigner(FlexibleModel):
extensions_subjectKeyIdentifier: Optional[str] = None


class AuxSigner(ExactModel):
name: str
issued_to: str = Field(alias="Issued to")
issued_by: str = Field(alias="Issued by")
expires: str = Field(alias="Expires")
sha1_hash: str = Field(alias="SHA1 hash")


class Signer(ExactModel):
aux_sha1: Optional[TODO] = None
aux_timestamp: Optional[None] = None
aux_sha1: Optional[str] = None
aux_timestamp: Optional[str] = None
aux_valid: Optional[bool] = None
aux_error: Optional[bool] = None
aux_error_desc: Optional[str] = None
aux_signers: Optional[ListTODO] = None
aux_signers: Optional[List[AuxSigner]] = None


class Overlay(ExactModel):
Expand Down Expand Up @@ -197,7 +205,10 @@ class PE(ExactModel):
guest_signers: Signer


class File(ExactModel):
# TODO(mr-tz): target.file.dotnet, target.file.extracted_files, target.file.extracted_files_tool,
# target.file.extracted_files_time
# https://github.com/mandiant/capa/issues/1814
class File(FlexibleModel):
type: str
cape_type_code: Optional[int] = None
cape_type: Optional[str] = None
Expand Down Expand Up @@ -350,6 +361,7 @@ class Behavior(ExactModel):
class Target(ExactModel):
category: str
file: File
pe: Optional[PE] = None


class Static(ExactModel):
Expand Down Expand Up @@ -385,7 +397,7 @@ class CapeReport(FlexibleModel):
# post-processed results: payloads and extracted configs
CAPE: Optional[Cape] = None
dropped: Optional[List[File]] = None
procdump: List[ProcessFile]
procdump: Optional[List[ProcessFile]] = None
procmemory: ListTODO

# =========================================================================
Expand Down
22 changes: 20 additions & 2 deletions capa/features/extractors/cape/thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Iterator

from capa.features.address import DynamicCallAddress
from capa.features.extractors.helpers import is_aw_function
from capa.features.extractors.cape.models import Process
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle

Expand All @@ -24,5 +25,22 @@ def get_calls(ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
if call.thread_id != tid:
continue

addr = DynamicCallAddress(thread=th.address, id=call_index)
yield CallHandle(address=addr, inner=call)
for symbol in generate_symbols(call.api):
call.api = symbol

addr = DynamicCallAddress(thread=th.address, id=call_index)
yield CallHandle(address=addr, inner=call)
Comment on lines +28 to +32
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've noticed we run into issues since CAPE reports only list the api (like CreateFileA), however:

  • rules often include the DLL (which we won't get easily here)
  • use the generic name (like CreateFile [not A/W])

This is one way/part of handling this, maybe we can come up with a more generic way.



def generate_symbols(symbol: str) -> Iterator[str]:
"""
for a given symbol name, generate variants.
we over-generate features to make matching easier.
"""

# CreateFileA
yield symbol

if is_aw_function(symbol):
# CreateFile
yield symbol[:-1]
12 changes: 10 additions & 2 deletions capa/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ def log_unsupported_format_error():
logger.error("-" * 80)


def log_unsupported_cape_report_error():
def log_unsupported_cape_report_error(error: str):
logger.error("-" * 80)
logger.error(" Input file is not a valid CAPE report.")
logger.error("Input file is not a valid CAPE report: %s", error)
logger.error(" ")
logger.error(" capa currently only supports analyzing standard CAPE json reports.")
logger.error(
Expand All @@ -159,6 +159,14 @@ def log_unsupported_cape_report_error():
logger.error("-" * 80)


def log_empty_cape_report_error(error: str):
logger.error("-" * 80)
logger.error(" CAPE report is empty or only contains little useful data: %s", error)
logger.error(" ")
logger.error(" Please make sure the sandbox run captures useful behaviour of your sample.")
logger.error("-" * 80)


def log_unsupported_os_error():
logger.error("-" * 80)
logger.error(" Input file does not appear to target a supported OS.")
Expand Down
25 changes: 20 additions & 5 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,17 @@
log_unsupported_os_error,
redirecting_print_to_tqdm,
log_unsupported_arch_error,
log_empty_cape_report_error,
log_unsupported_format_error,
log_unsupported_cape_report_error,
)
from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
from capa.exceptions import (
EmptyReportError,
UnsupportedOSError,
UnsupportedArchError,
UnsupportedFormatError,
UnsupportedRuntimeError,
)
from capa.features.common import (
OS_AUTO,
OS_LINUX,
Expand Down Expand Up @@ -1495,12 +1502,17 @@ def main(argv: Optional[List[str]] = None):
except (ELFError, OverflowError) as e:
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
return E_CORRUPT_FILE
except UnsupportedFormatError:
except UnsupportedFormatError as e:
if format_ == FORMAT_CAPE:
log_unsupported_cape_report_error()
log_unsupported_cape_report_error(str(e))
else:
log_unsupported_format_error()
return E_INVALID_FILE_TYPE
except EmptyReportError as e:
if format_ == FORMAT_CAPE:
log_empty_cape_report_error(str(e))
else:
log_unsupported_format_error()

for file_extractor in file_extractors:
if isinstance(file_extractor, DynamicFeatureExtractor):
Expand Down Expand Up @@ -1561,6 +1573,9 @@ def main(argv: Optional[List[str]] = None):

should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)

# TODO(mr-tz): this should be wrapped and refactored as it's tedious to update everywhere
# see same code and show-features above examples
# https://github.com/mandiant/capa/issues/1813
try:
extractor = get_extractor(
args.sample,
Expand All @@ -1571,9 +1586,9 @@ def main(argv: Optional[List[str]] = None):
should_save_workspace,
disable_progress=args.quiet or args.debug,
)
except UnsupportedFormatError:
except UnsupportedFormatError as e:
if format_ == FORMAT_CAPE:
log_unsupported_cape_report_error()
log_unsupported_cape_report_error(str(e))
else:
log_unsupported_format_error()
return E_INVALID_FILE_TYPE
Expand Down
8 changes: 2 additions & 6 deletions scripts/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@ class InvalidScopes(Lint):
recommendation = "At least one scope (static or dynamic) must be specified"

def check_rule(self, ctx: Context, rule: Rule):
return (rule.meta.get("scope").get("static") in ("unspecified", "unsupported")) and (
rule.meta.get("scope").get("dynamic") in ("unspecified", "unsupported")
return (rule.meta.get("scopes").get("static") in ("unspecified", "unsupported")) and (
rule.meta.get("scopes").get("dynamic") in ("unspecified", "unsupported")
)


Expand Down Expand Up @@ -979,10 +979,6 @@ def main(argv=None):

default_samples_path = str(Path(__file__).resolve().parent.parent / "tests" / "data")

# TODO(yelhamer): remove once support for the legacy scope field has been added
# https://github.com/mandiant/capa/pull/1580
return 0

parser = argparse.ArgumentParser(description="Lint capa rules.")
capa.main.install_common_args(parser, wanted={"tag"})
parser.add_argument("rules", type=str, action="append", help="Path to rules")
Expand Down
23 changes: 17 additions & 6 deletions scripts/show-features.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,15 @@
import capa.features.extractors.pefile
from capa.helpers import get_auto_format, log_unsupported_runtime_error
from capa.features.insn import API, Number
from capa.features.common import FORMAT_AUTO, FORMAT_FREEZE, DYNAMIC_FORMATS, String, Feature, is_global_feature
from capa.features.common import (
FORMAT_AUTO,
FORMAT_CAPE,
FORMAT_FREEZE,
DYNAMIC_FORMATS,
String,
Feature,
is_global_feature,
)
from capa.features.extractors.base_extractor import FunctionHandle, StaticFeatureExtractor, DynamicFeatureExtractor

logger = logging.getLogger("capa.show-features")
Expand Down Expand Up @@ -132,8 +140,11 @@ def main(argv=None):
extractor = capa.main.get_extractor(
args.sample, format_, args.os, args.backend, sig_paths, should_save_workspace
)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
except capa.exceptions.UnsupportedFormatError as e:
if format_ == FORMAT_CAPE:
capa.helpers.log_unsupported_cape_report_error(str(e))
else:
capa.helpers.log_unsupported_format_error()
return -1
except capa.exceptions.UnsupportedRuntimeError:
log_unsupported_runtime_error()
Expand Down Expand Up @@ -248,13 +259,13 @@ def print_static_features(functions, extractor: StaticFeatureExtractor):

def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
for p in processes:
print(f"proc: {p.inner['name']} (ppid={p.address.ppid}, pid={p.address.pid})")
print(f"proc: {p.inner.process_name} (ppid={p.address.ppid}, pid={p.address.pid})")

for feature, addr in extractor.extract_process_features(p):
if is_global_feature(feature):
continue

print(f" proc: {p.inner['name']}: {feature}")
print(f" proc: {p.inner.process_name}: {feature}")

for t in extractor.get_threads(p):
print(f" thread: {t.address.tid}")
Expand Down Expand Up @@ -283,7 +294,7 @@ def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
print(f" arguments=[{', '.join(arguments)}]")

for cid, api in apis:
print(f"call {cid}: {api}({', '.join(arguments)})")
print(f" call {cid}: {api}({', '.join(arguments)})")


def ida_main():
Expand Down
Loading