Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore DLL names for API features #1824

Merged
merged 5 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
- remove the `SCOPE_*` constants in favor of the `Scope` enum #1764 @williballenthin
- protobuf: deprecate `RuleMetadata.scope` in favor of `RuleMetadata.scopes` @williballenthin
- protobuf: deprecate `Metadata.analysis` in favor of `Metadata.analysis2` that is dynamic analysis aware @williballenthin
- extractor: ignore DLL name for api features #1815 @mr-tz

### New Rules (19)

Expand Down
4 changes: 2 additions & 2 deletions capa/features/extractors/binja/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,13 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
for sym in bv.get_symbols_of_type(SymbolType.ImportAddressSymbol):
lib_name = str(sym.namespace)
addr = AbsoluteVirtualAddress(sym.address)
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name, include_dll=True):
yield Import(name), addr

ordinal = sym.ordinal
if ordinal != 0 and (lib_name != ""):
ordinal_name = f"#{ordinal}"
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name, include_dll=True):
yield Import(name), addr


Expand Down
3 changes: 2 additions & 1 deletion capa/features/extractors/cape/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]
if not function.name:
continue

for name in generate_symbols(library.dll, function.name):
for name in generate_symbols(library.dll, function.name, include_dll=True):
yield Import(name), AbsoluteVirtualAddress(function.address)


Expand Down Expand Up @@ -126,6 +126,7 @@ def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
extract_used_regkeys,
extract_used_files,
extract_used_mutexes,
extract_used_commands,
extract_used_apis,
extract_used_services,
)
18 changes: 2 additions & 16 deletions capa/features/extractors/cape/thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Iterator

from capa.features.address import DynamicCallAddress
from capa.features.extractors.helpers import is_aw_function
from capa.features.extractors.helpers import generate_symbols
from capa.features.extractors.cape.models import Process
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle

Expand All @@ -25,22 +25,8 @@ def get_calls(ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
if call.thread_id != tid:
continue

for symbol in generate_symbols(call.api):
for symbol in generate_symbols("", call.api):
call.api = symbol

addr = DynamicCallAddress(thread=th.address, id=call_index)
yield CallHandle(address=addr, inner=call)


def generate_symbols(symbol: str) -> Iterator[str]:
"""
for a given symbol name, generate variants.
we over-generate features to make matching easier.
"""

# CreateFileA
yield symbol

if is_aw_function(symbol):
# CreateFile
yield symbol[:-1]
2 changes: 1 addition & 1 deletion capa/features/extractors/dotnetfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor

for imp in get_dotnet_unmanaged_imports(pe):
# like kernel32.CreateFileA
for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method):
for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method, include_dll=True):
yield Import(name), DNTokenAddress(imp.token)


Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/ghidra/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
if "Ordinal_" in fstr[1]:
fstr[1] = f"#{fstr[1].split('_')[1]}"

for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1]):
for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1], include_dll=True):
yield Import(name), AbsoluteVirtualAddress(addr)


Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/ghidra/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_file_imports() -> Dict[int, List[str]]:
# <EXTERNAL> mostly shows up in ELF files, otherwise, strip '.dll' w/ [:-4]
fstr[0] = "*" if "<EXTERNAL>" in fstr[0] else fstr[0][:-4]

for name in capa.features.extractors.helpers.generate_symbols(fstr[0], fstr[1]):
for name in capa.features.extractors.helpers.generate_symbols(fstr[0], fstr[1], include_dll=True):
import_dict.setdefault(addr, []).append(name)
if ex_loc:
import_dict.setdefault(ex_loc.getOffset(), []).append(name)
Expand Down
27 changes: 17 additions & 10 deletions capa/features/extractors/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,20 @@ def is_ordinal(symbol: str) -> bool:
return False


def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
def generate_symbols(dll: str, symbol: str, include_dll=False) -> Iterator[str]:
"""
for a given dll and symbol name, generate variants.
we over-generate features to make matching easier.
these include:
- kernel32.CreateFileA
- kernel32.CreateFile
- CreateFileA
- CreateFile
- ws2_32.#1

note that since capa v7 only `import` features include DLL names:
- kernel32.CreateFileA
- kernel32.CreateFile

for `api` features dll names are good for documentation but not used during matching
"""
# normalize dll name
dll = dll.lower()
Expand All @@ -58,25 +63,27 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
dll = dll[0:-4] if dll.endswith(".dll") else dll
dll = dll[0:-4] if dll.endswith(".drv") else dll

# kernel32.CreateFileA
yield f"{dll}.{symbol}"
if include_dll:
# ws2_32.#1
# kernel32.CreateFileA
yield f"{dll}.{symbol}"

if not is_ordinal(symbol):
# CreateFileA
yield symbol

if is_aw_function(symbol):
# kernel32.CreateFile
yield f"{dll}.{symbol[:-1]}"
if include_dll:
# kernel32.CreateFile
yield f"{dll}.{symbol[:-1]}"

if not is_ordinal(symbol):
if is_aw_function(symbol):
# CreateFile
yield symbol[:-1]


def reformat_forwarded_export_name(forwarded_name: str) -> str:
"""
a forwarded export has a DLL name/path an symbol name.
a forwarded export has a DLL name/path and symbol name.
we want the former to be lowercase, and the latter to be verbatim.
"""

Expand Down
4 changes: 2 additions & 2 deletions capa/features/extractors/ida/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
if info[1] and info[2]:
# e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L)
# extract by name here and by ordinal below
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]):
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1], include_dll=True):
yield Import(name), addr
dll = info[0]
symbol = f"#{info[2]}"
Expand All @@ -123,7 +123,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
else:
continue

for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol, include_dll=True):
yield Import(name), addr

for ea, info in capa.features.extractors.ida.helpers.get_file_externs().items():
Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/pefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def extract_file_import_names(pe, **kwargs):
except UnicodeDecodeError:
continue

for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True):
yield Import(name), AbsoluteVirtualAddress(imp.address)


Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/viv/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]
impname = "#" + impname[len("ord") :]

addr = AbsoluteVirtualAddress(va)
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True):
yield Import(name), addr


Expand Down
15 changes: 15 additions & 0 deletions capa/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,13 @@ def pop_statement_description_entry(d):
return description["description"]


def trim_dll_part(api: str) -> str:
# kernel32.CreateFileA
if api.count(".") == 1:
api = api.split(".")[1]
return api


def build_statements(d, scopes: Scopes):
if len(d.keys()) > 2:
raise InvalidRule("too many statements")
Expand Down Expand Up @@ -722,6 +729,10 @@ def build_statements(d, scopes: Scopes):
# count(number(0x100 = description))
if term != "string":
value, description = parse_description(arg, term)

if term == "api":
value = trim_dll_part(value)

feature = Feature(value, description=description)
else:
# arg is string (which doesn't support inline descriptions), like:
Expand Down Expand Up @@ -816,6 +827,10 @@ def build_statements(d, scopes: Scopes):
else:
Feature = parse_feature(key)
value, description = parse_description(d[key], key, d.get("description"))

if key == "api":
value = trim_dll_part(value)

try:
feature = Feature(value, description=description)
except ValueError as e:
Expand Down
31 changes: 20 additions & 11 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ def parametrize(params, values, **kwargs):
("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True),
("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True),
("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True),
("mimikatz", "file", capa.features.file.Import("IsWow64Process"), True),
("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True),
("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True),
("mimikatz", "file", capa.features.file.Import("#11"), False),
Expand Down Expand Up @@ -859,11 +860,12 @@ def parametrize(params, values, **kwargs):
# .text:004018C0 8D 4B 02 lea ecx, [ebx+2]
("mimikatz", "function=0x401873,bb=0x4018B2,insn=0x4018C0", capa.features.insn.Number(0x2), True),
# insn/api
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True),
# not extracting dll anymore
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True),
Expand All @@ -872,7 +874,8 @@ def parametrize(params, values, **kwargs):
("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False),
# insn/api: thunk
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True),
# not extracting dll anymore
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
# insn/api: x64
(
Expand All @@ -896,10 +899,15 @@ def parametrize(params, values, **kwargs):
("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
# insn/api: resolve indirect calls
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True),
# not extracting dll anymore
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), False),
("c91887...", "function=0x401A77", capa.features.insn.API("CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("SetHandleInformation"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("CloseHandle"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("WriteFile"), True),
# insn/string
("mimikatz", "function=0x40105D", capa.features.common.String("SCardControl"), True),
("mimikatz", "function=0x40105D", capa.features.common.String("SCardTransmit"), True),
Expand Down Expand Up @@ -1074,7 +1082,8 @@ def parametrize(params, values, **kwargs):
("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True),
("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False),
("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True),
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True),
# not extracting dll anymore
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), False),
("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True),
("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), True),
("_1c444", "token=0x6000018", capa.features.common.Characteristic("calls to"), False),
Expand Down
24 changes: 23 additions & 1 deletion tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import capa.features.address
from capa.engine import Or
from capa.features.file import FunctionName
from capa.features.insn import Number, Offset, Property
from capa.features.insn import API, Number, Offset, Property
from capa.features.common import (
OS,
OS_LINUX,
Expand Down Expand Up @@ -937,6 +937,28 @@ def test_count_number_symbol():
assert bool(r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}})) is True


def test_count_api():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: thread
features:
- or:
- count(api(kernel32.CreateFileA)): 1
"""
)
r = capa.rules.Rule.from_yaml(rule)
# apis including their DLL names are not extracted anymore
assert bool(r.evaluate({API("kernel32.CreateFileA"): set()})) is False
assert bool(r.evaluate({API("kernel32.CreateFile"): set()})) is False
assert bool(r.evaluate({API("CreateFile"): {ADDR1}})) is False
assert bool(r.evaluate({API("CreateFileA"): {ADDR1}})) is True


def test_invalid_number():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(
Expand Down
Loading