Skip to content

Commit

Permalink
Ignore DLL names for API features (#1824)
Browse files Browse the repository at this point in the history
* ignore DLL name for api features

* keep DLL name for import features

* fix tests
  • Loading branch information
mr-tz authored Oct 20, 2023
1 parent 62d4b00 commit c9df782
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 47 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- protobuf: deprecate `RuleMetadata.scope` in favor of `RuleMetadata.scopes` @williballenthin
- protobuf: deprecate `Metadata.analysis` in favor of `Metadata.analysis2` that is dynamic analysis aware @williballenthin
- update freeze format to v3, adding support for dynamic analysis @williballenthin
- extractor: ignore DLL name for api features #1815 @mr-tz

### New Rules (19)

Expand Down
4 changes: 2 additions & 2 deletions capa/features/extractors/binja/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,13 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
for sym in bv.get_symbols_of_type(SymbolType.ImportAddressSymbol):
lib_name = str(sym.namespace)
addr = AbsoluteVirtualAddress(sym.address)
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name, include_dll=True):
yield Import(name), addr

ordinal = sym.ordinal
if ordinal != 0 and (lib_name != ""):
ordinal_name = f"#{ordinal}"
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name, include_dll=True):
yield Import(name), addr


Expand Down
3 changes: 2 additions & 1 deletion capa/features/extractors/cape/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]
if not function.name:
continue

for name in generate_symbols(library.dll, function.name):
for name in generate_symbols(library.dll, function.name, include_dll=True):
yield Import(name), AbsoluteVirtualAddress(function.address)


Expand Down Expand Up @@ -126,6 +126,7 @@ def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
extract_used_regkeys,
extract_used_files,
extract_used_mutexes,
extract_used_commands,
extract_used_apis,
extract_used_services,
)
18 changes: 2 additions & 16 deletions capa/features/extractors/cape/thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Iterator

from capa.features.address import DynamicCallAddress
from capa.features.extractors.helpers import is_aw_function
from capa.features.extractors.helpers import generate_symbols
from capa.features.extractors.cape.models import Process
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle

Expand All @@ -25,22 +25,8 @@ def get_calls(ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
if call.thread_id != tid:
continue

for symbol in generate_symbols(call.api):
for symbol in generate_symbols("", call.api):
call.api = symbol

addr = DynamicCallAddress(thread=th.address, id=call_index)
yield CallHandle(address=addr, inner=call)


def generate_symbols(symbol: str) -> Iterator[str]:
"""
for a given symbol name, generate variants.
we over-generate features to make matching easier.
"""

# CreateFileA
yield symbol

if is_aw_function(symbol):
# CreateFile
yield symbol[:-1]
2 changes: 1 addition & 1 deletion capa/features/extractors/dotnetfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor

for imp in get_dotnet_unmanaged_imports(pe):
# like kernel32.CreateFileA
for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method):
for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method, include_dll=True):
yield Import(name), DNTokenAddress(imp.token)


Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/ghidra/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
if "Ordinal_" in fstr[1]:
fstr[1] = f"#{fstr[1].split('_')[1]}"

for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1]):
for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1], include_dll=True):
yield Import(name), AbsoluteVirtualAddress(addr)


Expand Down
27 changes: 17 additions & 10 deletions capa/features/extractors/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,20 @@ def is_ordinal(symbol: str) -> bool:
return False


def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
def generate_symbols(dll: str, symbol: str, include_dll=False) -> Iterator[str]:
"""
for a given dll and symbol name, generate variants.
we over-generate features to make matching easier.
these include:
- kernel32.CreateFileA
- kernel32.CreateFile
- CreateFileA
- CreateFile
- ws2_32.#1
note that since capa v7 only `import` features include DLL names:
- kernel32.CreateFileA
- kernel32.CreateFile
for `api` features dll names are good for documentation but not used during matching
"""
# normalize dll name
dll = dll.lower()
Expand All @@ -58,25 +63,27 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
dll = dll[0:-4] if dll.endswith(".dll") else dll
dll = dll[0:-4] if dll.endswith(".drv") else dll

# kernel32.CreateFileA
yield f"{dll}.{symbol}"
if include_dll:
# ws2_32.#1
# kernel32.CreateFileA
yield f"{dll}.{symbol}"

if not is_ordinal(symbol):
# CreateFileA
yield symbol

if is_aw_function(symbol):
# kernel32.CreateFile
yield f"{dll}.{symbol[:-1]}"
if include_dll:
# kernel32.CreateFile
yield f"{dll}.{symbol[:-1]}"

if not is_ordinal(symbol):
if is_aw_function(symbol):
# CreateFile
yield symbol[:-1]


def reformat_forwarded_export_name(forwarded_name: str) -> str:
"""
a forwarded export has a DLL name/path an symbol name.
a forwarded export has a DLL name/path and symbol name.
we want the former to be lowercase, and the latter to be verbatim.
"""

Expand Down
4 changes: 2 additions & 2 deletions capa/features/extractors/ida/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
if info[1] and info[2]:
# e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L)
# extract by name here and by ordinal below
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]):
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1], include_dll=True):
yield Import(name), addr
dll = info[0]
symbol = f"#{info[2]}"
Expand All @@ -123,7 +123,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
else:
continue

for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol, include_dll=True):
yield Import(name), addr

for ea, info in capa.features.extractors.ida.helpers.get_file_externs().items():
Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/pefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def extract_file_import_names(pe, **kwargs):
except UnicodeDecodeError:
continue

for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True):
yield Import(name), AbsoluteVirtualAddress(imp.address)


Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/viv/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]
impname = "#" + impname[len("ord") :]

addr = AbsoluteVirtualAddress(va)
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True):
yield Import(name), addr


Expand Down
15 changes: 15 additions & 0 deletions capa/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,13 @@ def pop_statement_description_entry(d):
return description["description"]


def trim_dll_part(api: str) -> str:
# kernel32.CreateFileA
if api.count(".") == 1:
api = api.split(".")[1]
return api


def build_statements(d, scopes: Scopes):
if len(d.keys()) > 2:
raise InvalidRule("too many statements")
Expand Down Expand Up @@ -722,6 +729,10 @@ def build_statements(d, scopes: Scopes):
# count(number(0x100 = description))
if term != "string":
value, description = parse_description(arg, term)

if term == "api":
value = trim_dll_part(value)

feature = Feature(value, description=description)
else:
# arg is string (which doesn't support inline descriptions), like:
Expand Down Expand Up @@ -816,6 +827,10 @@ def build_statements(d, scopes: Scopes):
else:
Feature = parse_feature(key)
value, description = parse_description(d[key], key, d.get("description"))

if key == "api":
value = trim_dll_part(value)

try:
feature = Feature(value, description=description)
except ValueError as e:
Expand Down
31 changes: 20 additions & 11 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ def parametrize(params, values, **kwargs):
("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True),
("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True),
("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True),
("mimikatz", "file", capa.features.file.Import("IsWow64Process"), True),
("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True),
("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True),
("mimikatz", "file", capa.features.file.Import("#11"), False),
Expand Down Expand Up @@ -859,11 +860,12 @@ def parametrize(params, values, **kwargs):
# .text:004018C0 8D 4B 02 lea ecx, [ebx+2]
("mimikatz", "function=0x401873,bb=0x4018B2,insn=0x4018C0", capa.features.insn.Number(0x2), True),
# insn/api
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True),
# not extracting dll anymore
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True),
Expand All @@ -872,7 +874,8 @@ def parametrize(params, values, **kwargs):
("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False),
# insn/api: thunk
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True),
# not extracting dll anymore
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
# insn/api: x64
(
Expand All @@ -896,10 +899,15 @@ def parametrize(params, values, **kwargs):
("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
# insn/api: resolve indirect calls
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True),
# not extracting dll anymore
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("SetHandleInformation"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("CloseHandle"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("WriteFile"), True),
# insn/string
("mimikatz", "function=0x40105D", capa.features.common.String("SCardControl"), True),
("mimikatz", "function=0x40105D", capa.features.common.String("SCardTransmit"), True),
Expand Down Expand Up @@ -1074,7 +1082,8 @@ def parametrize(params, values, **kwargs):
("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True),
("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False),
("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True),
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True),
# not extracting dll anymore
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), False),
("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True),
("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), True),
("_1c444", "token=0x6000018", capa.features.common.Characteristic("calls to"), False),
Expand Down
24 changes: 23 additions & 1 deletion tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import capa.features.address
from capa.engine import Or
from capa.features.file import FunctionName
from capa.features.insn import Number, Offset, Property
from capa.features.insn import API, Number, Offset, Property
from capa.features.common import (
OS,
OS_LINUX,
Expand Down Expand Up @@ -937,6 +937,28 @@ def test_count_number_symbol():
assert bool(r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}})) is True


def test_count_api():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: thread
features:
- or:
- count(api(kernel32.CreateFileA)): 1
"""
)
r = capa.rules.Rule.from_yaml(rule)
# apis including their DLL names are not extracted anymore
assert bool(r.evaluate({API("kernel32.CreateFileA"): set()})) is False
assert bool(r.evaluate({API("kernel32.CreateFile"): set()})) is False
assert bool(r.evaluate({API("CreateFile"): {ADDR1}})) is False
assert bool(r.evaluate({API("CreateFileA"): {ADDR1}})) is True


def test_invalid_number():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
Expand Down

0 comments on commit c9df782

Please sign in to comment.