Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add format to global features and code refactors #1284

Merged
merged 4 commits into from
Jan 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
- show-features: better render strings with embedded whitespace #1267 @williballenthin
- handle vivisect bug around strings at instruction level, use min length 4 #1271 @williballenthin @mr-tz
- extractor: guard against invalid "calls from" features #1177 @mr-tz
- extractor: add format to global features #1258 @mr-tz

### capa explorer IDA Pro plugin
- fix: display instruction items #1154 @mr-tz
Expand Down
3 changes: 2 additions & 1 deletion capa/features/extractors/dnfile/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

from __future__ import annotations

from enum import Enum
from typing import Dict, List, Tuple, Union, Iterator, Optional

import dnfile
from dncil.cil.opcode import OpCodes

import capa.features.extractors
import capa.features.extractors.dotnetfile
import capa.features.extractors.dnfile.file
import capa.features.extractors.dnfile.insn
import capa.features.extractors.dnfile.function
Expand Down Expand Up @@ -78,6 +78,7 @@ def __init__(self, path: str):

# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))

Expand Down
1 change: 1 addition & 0 deletions capa/features/extractors/ida/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class IdaFeatureExtractor(FeatureExtractor):
def __init__(self):
super().__init__()
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())

Expand Down
1 change: 1 addition & 0 deletions capa/features/extractors/viv/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(self, vw, path):

# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))

Expand Down
8 changes: 7 additions & 1 deletion capa/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import NoReturn

from capa.exceptions import UnsupportedFormatError
from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN
from capa.features.common import FORMAT_PE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format

EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
Expand Down Expand Up @@ -68,11 +68,17 @@ def get_auto_format(path: str) -> str:
def get_format(sample: str) -> str:
# imported locally to avoid import cycle
from capa.features.extractors.common import extract_format
from capa.features.extractors.dnfile_ import DnfileFeatureExtractor

with open(sample, "rb") as f:
buf = f.read()

for feature, _ in extract_format(buf):
if feature == Format(FORMAT_PE):
dnfile_extractor = DnfileFeatureExtractor(sample)
if dnfile_extractor.is_dotnet_file():
feature = Format(FORMAT_DOTNET)

assert isinstance(feature.value, str)
return feature.value

Expand Down
22 changes: 10 additions & 12 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import itertools
import contextlib
import collections
from typing import Any, Dict, List, Tuple, Optional
from typing import Any, Dict, List, Tuple

import halo
import tqdm
Expand Down Expand Up @@ -535,12 +535,12 @@ def get_extractor(
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
file_extractors: List[FeatureExtractor] = list()

if format_ == capa.features.extractors.common.FORMAT_PE:
if format_ == FORMAT_PE:
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))

dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)
if dnfile_extractor.is_dotnet_file():
file_extractors.append(dnfile_extractor)
elif format_ == FORMAT_DOTNET:
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample))

elif format_ == capa.features.extractors.common.FORMAT_ELF:
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
Expand Down Expand Up @@ -646,7 +646,6 @@ def collect_metadata(
sample_path: str,
rules_path: List[str],
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
format_: Optional[str] = None,
):
md5 = hashlib.md5()
sha1 = hashlib.sha1()
Expand All @@ -662,8 +661,7 @@ def collect_metadata(
if rules_path != [RULES_PATH_DEFAULT_STRING]:
rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path]

if format_ is None:
format_ = get_format(sample_path)
format_ = get_format(sample_path)
arch = get_arch(sample_path)
os_ = get_os(sample_path)

Expand Down Expand Up @@ -996,6 +994,9 @@ def main(argv=None):
if format_ == FORMAT_AUTO:
try:
format_ = get_auto_format(args.sample)
except PEFormatError as e:
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
return E_CORRUPT_FILE
except UnsupportedFormatError:
log_unsupported_format_error()
return E_INVALID_FILE_TYPE
Expand Down Expand Up @@ -1058,9 +1059,6 @@ def main(argv=None):
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
return E_CORRUPT_FILE

if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
format_ = FORMAT_DOTNET

# file limitations that rely on non-file scope won't be detected here.
# nor on FunctionName features, because pefile doesn't support this.
if has_file_limitation(rules, pure_file_capabilities):
Expand Down Expand Up @@ -1100,7 +1098,7 @@ def main(argv=None):
log_unsupported_os_error()
return E_INVALID_FILE_OS

meta = collect_metadata(argv, args.sample, args.rules, extractor, format_=format_)
meta = collect_metadata(argv, args.sample, args.rules, extractor)

capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
meta["analysis"].update(counts)
Expand Down
6 changes: 1 addition & 5 deletions scripts/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,11 +307,7 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64):
format_ = "sc64"
else:
format_ = "auto"
if not nice_path.endswith(capa.helpers.EXTENSIONS_ELF):
dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(nice_path)
if dnfile_extractor.is_dotnet_file():
format_ = FORMAT_DOTNET
format_ = capa.main.get_auto_format(nice_path)

logger.debug("analyzing sample: %s", nice_path)
extractor = capa.main.get_extractor(nice_path, format_, "", DEFAULT_SIGNATURES, False, disable_progress=True)
Expand Down
2 changes: 1 addition & 1 deletion scripts/show-capabilities-by-function.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def main(argv=None):
capa.helpers.log_unsupported_runtime_error()
return -1

meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor, format_=format_)
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor)
capabilities, counts = capa.main.find_capabilities(rules, extractor)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)
Expand Down
8 changes: 8 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,14 +689,22 @@ def parametrize(params, values, **kwargs):
# os & format & arch
("pma16-01", "file", OS(OS_WINDOWS), True),
("pma16-01", "file", OS(OS_LINUX), False),
("mimikatz", "file", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
("pma16-01", "file", Arch(ARCH_I386), True),
("pma16-01", "file", Arch(ARCH_AMD64), False),
("mimikatz", "file", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
("pma16-01", "file", Format(FORMAT_PE), True),
("pma16-01", "file", Format(FORMAT_ELF), False),
("mimikatz", "file", Format(FORMAT_PE), True),
# format is also a global feature
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
# elf support
("7351f.elf", "file", OS(OS_LINUX), True),
("7351f.elf", "file", OS(OS_WINDOWS), False),
Expand Down