diff --git a/CHANGELOG.md b/CHANGELOG.md index 17eac2f52..115f2ea95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - protobuf: deprecate `RuleMetadata.scope` in favor of `RuleMetadata.scopes` @williballenthin - protobuf: deprecate `Metadata.analysis` in favor of `Metadata.analysis2` that is dynamic analysis aware @williballenthin - update freeze format to v3, adding support for dynamic analysis @williballenthin +- extractor: ignore DLL name for api features #1815 @mr-tz ### New Rules (19) diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index 84b25348b..0054e62b1 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -115,13 +115,13 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address for sym in bv.get_symbols_of_type(SymbolType.ImportAddressSymbol): lib_name = str(sym.namespace) addr = AbsoluteVirtualAddress(sym.address) - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name): + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name, include_dll=True): yield Import(name), addr ordinal = sym.ordinal if ordinal != 0 and (lib_name != ""): ordinal_name = f"#{ordinal}" - for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name): + for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name, include_dll=True): yield Import(name), addr diff --git a/capa/features/extractors/cape/file.py b/capa/features/extractors/cape/file.py index 66ec8c4fb..3143504c0 100644 --- a/capa/features/extractors/cape/file.py +++ b/capa/features/extractors/cape/file.py @@ -58,7 +58,7 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address] if not function.name: continue - for name in generate_symbols(library.dll, function.name): + for name in generate_symbols(library.dll, function.name, include_dll=True): yield Import(name), AbsoluteVirtualAddress(function.address) @@ -126,6 +126,7 @@ def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: extract_used_regkeys, extract_used_files, extract_used_mutexes, + extract_used_commands, extract_used_apis, extract_used_services, ) diff --git a/capa/features/extractors/cape/thread.py b/capa/features/extractors/cape/thread.py index cfdb081cf..648b092ee 100644 --- a/capa/features/extractors/cape/thread.py +++ b/capa/features/extractors/cape/thread.py @@ -10,7 +10,7 @@ from typing import Iterator from capa.features.address import DynamicCallAddress -from capa.features.extractors.helpers import is_aw_function +from capa.features.extractors.helpers import generate_symbols from capa.features.extractors.cape.models import Process from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle @@ -25,22 +25,8 @@ def get_calls(ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]: if call.thread_id != tid: continue - for symbol in generate_symbols(call.api): + for symbol in generate_symbols("", call.api): call.api = symbol addr = DynamicCallAddress(thread=th.address, id=call_index) yield CallHandle(address=addr, inner=call) - - -def generate_symbols(symbol: str) -> Iterator[str]: - """ - for a given symbol name, generate variants. - we over-generate features to make matching easier. - """ - - # CreateFileA - yield symbol - - if is_aw_function(symbol): - # CreateFile - yield symbol[:-1] diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index ff942ae72..a9d36d299 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -57,7 +57,7 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor for imp in get_dotnet_unmanaged_imports(pe): # like kernel32.CreateFileA - for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method): + for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method, include_dll=True): yield Import(name), DNTokenAddress(imp.token) diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py index 047205022..118575c17 100644 --- a/capa/features/extractors/ghidra/file.py +++ b/capa/features/extractors/ghidra/file.py @@ -112,7 +112,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: if "Ordinal_" in fstr[1]: fstr[1] = f"#{fstr[1].split('_')[1]}" - for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1]): + for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1], include_dll=True): yield Import(name), AbsoluteVirtualAddress(addr) diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index a80d030d3..71d28ef52 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -41,15 +41,20 @@ def is_ordinal(symbol: str) -> bool: return False -def generate_symbols(dll: str, symbol: str) -> Iterator[str]: +def generate_symbols(dll: str, symbol: str, include_dll=False) -> Iterator[str]: """ for a given dll and symbol name, generate variants. we over-generate features to make matching easier. these include: - - kernel32.CreateFileA - - kernel32.CreateFile - CreateFileA - CreateFile + - ws2_32.#1 + + note that since capa v7 only `import` features include DLL names: + - kernel32.CreateFileA + - kernel32.CreateFile + + for `api` features dll names are good for documentation but not used during matching """ # normalize dll name dll = dll.lower() @@ -58,25 +63,27 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]: dll = dll[0:-4] if dll.endswith(".dll") else dll dll = dll[0:-4] if dll.endswith(".drv") else dll - # kernel32.CreateFileA - yield f"{dll}.{symbol}" + if include_dll: + # ws2_32.#1 + # kernel32.CreateFileA + yield f"{dll}.{symbol}" if not is_ordinal(symbol): # CreateFileA yield symbol - if is_aw_function(symbol): - # kernel32.CreateFile - yield f"{dll}.{symbol[:-1]}" + if include_dll: + # kernel32.CreateFile + yield f"{dll}.{symbol[:-1]}" - if not is_ordinal(symbol): + if is_aw_function(symbol): # CreateFile yield symbol[:-1] def reformat_forwarded_export_name(forwarded_name: str) -> str: """ - a forwarded export has a DLL name/path an symbol name. + a forwarded export has a DLL name/path and symbol name. we want the former to be lowercase, and the latter to be verbatim. """ diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index efa4b66c7..24f9528fd 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -110,7 +110,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: if info[1] and info[2]: # e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L) # extract by name here and by ordinal below - for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]): + for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1], include_dll=True): yield Import(name), addr dll = info[0] symbol = f"#{info[2]}" @@ -123,7 +123,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: else: continue - for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol, include_dll=True): yield Import(name), addr for ea, info in capa.features.extractors.ida.helpers.get_file_externs().items(): diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index 55e0688ee..abd917c07 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -84,7 +84,7 @@ def extract_file_import_names(pe, **kwargs): except UnicodeDecodeError: continue - for name in capa.features.extractors.helpers.generate_symbols(modname, impname): + for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True): yield Import(name), AbsoluteVirtualAddress(imp.address) diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index 204d8e693..52d56accd 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -73,7 +73,7 @@ def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]] impname = "#" + impname[len("ord") :] addr = AbsoluteVirtualAddress(va) - for name in capa.features.extractors.helpers.generate_symbols(modname, impname): + for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True): yield Import(name), addr diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index e715ae863..b41f259b7 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -595,6 +595,13 @@ def pop_statement_description_entry(d): return description["description"] +def trim_dll_part(api: str) -> str: + # kernel32.CreateFileA + if api.count(".") == 1: + api = api.split(".")[1] + return api + + def build_statements(d, scopes: Scopes): if len(d.keys()) > 2: raise InvalidRule("too many statements") @@ -722,6 +729,10 @@ def build_statements(d, scopes: Scopes): # count(number(0x100 = description)) if term != "string": value, description = parse_description(arg, term) + + if term == "api": + value = trim_dll_part(value) + feature = Feature(value, description=description) else: # arg is string (which doesn't support inline descriptions), like: @@ -816,6 +827,10 @@ def build_statements(d, scopes: Scopes): else: Feature = parse_feature(key) value, description = parse_description(d[key], key, d.get("description")) + + if key == "api": + value = trim_dll_part(value) + try: feature = Feature(value, description=description) except ValueError as e: diff --git a/tests/fixtures.py b/tests/fixtures.py index 1cf095cb8..2f8eac15a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -779,6 +779,7 @@ def parametrize(params, values, **kwargs): ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True), ("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True), ("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True), + ("mimikatz", "file", capa.features.file.Import("IsWow64Process"), True), ("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True), ("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True), ("mimikatz", "file", capa.features.file.Import("#11"), False), @@ -859,11 +860,12 @@ def parametrize(params, values, **kwargs): # .text:004018C0 8D 4B 02 lea ecx, [ebx+2] ("mimikatz", "function=0x401873,bb=0x4018B2,insn=0x4018C0", capa.features.insn.Number(0x2), True), # insn/api - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True), + # not extracting dll anymore + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), False), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), False), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), False), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), False), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), False), ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True), ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True), ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True), @@ -872,7 +874,8 @@ def parametrize(params, values, **kwargs): ("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False), ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False), # insn/api: thunk - ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True), + # not extracting dll anymore + ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False), ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True), # insn/api: x64 ( @@ -896,10 +899,15 @@ def parametrize(params, values, **kwargs): ("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True), ("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True), # insn/api: resolve indirect calls - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True), + # not extracting dll anymore + ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), False), + ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), False), + ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), False), + ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), False), + ("c91887...", "function=0x401A77", capa.features.insn.API("CreatePipe"), True), + ("c91887...", "function=0x401A77", capa.features.insn.API("SetHandleInformation"), True), + ("c91887...", "function=0x401A77", capa.features.insn.API("CloseHandle"), True), + ("c91887...", "function=0x401A77", capa.features.insn.API("WriteFile"), True), # insn/string ("mimikatz", "function=0x40105D", capa.features.common.String("SCardControl"), True), ("mimikatz", "function=0x40105D", capa.features.common.String("SCardTransmit"), True), @@ -1074,7 +1082,8 @@ def parametrize(params, values, **kwargs): ("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True), ("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False), ("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True), - ("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True), + # not extracting dll anymore + ("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), False), ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), True), ("_1c444", "token=0x6000018", capa.features.common.Characteristic("calls to"), False), diff --git a/tests/test_rules.py b/tests/test_rules.py index edd33ac7f..0683526c4 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -16,7 +16,7 @@ import capa.features.address from capa.engine import Or from capa.features.file import FunctionName -from capa.features.insn import Number, Offset, Property +from capa.features.insn import API, Number, Offset, Property from capa.features.common import ( OS, OS_LINUX, @@ -937,6 +937,28 @@ def test_count_number_symbol(): assert bool(r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}})) is True +def test_count_api(): + rule = textwrap.dedent( + """ + rule: + meta: + name: test rule + scopes: + static: function + dynamic: thread + features: + - or: + - count(api(kernel32.CreateFileA)): 1 + """ + ) + r = capa.rules.Rule.from_yaml(rule) + # apis including their DLL names are not extracted anymore + assert bool(r.evaluate({API("kernel32.CreateFileA"): set()})) is False + assert bool(r.evaluate({API("kernel32.CreateFile"): set()})) is False + assert bool(r.evaluate({API("CreateFile"): {ADDR1}})) is False + assert bool(r.evaluate({API("CreateFileA"): {ADDR1}})) is True + + def test_invalid_number(): with pytest.raises(capa.rules.InvalidRule): _ = capa.rules.Rule.from_yaml(