From 5fbc191bfc8e052f2c0fdf3a7db8d7ab987331a1 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Wed, 4 May 2022 16:51:38 -0600 Subject: [PATCH 01/10] dotnet: extract file function names --- capa/features/extractors/dnfile/file.py | 10 ++++-- capa/features/extractors/dnfile/helpers.py | 4 +++ capa/features/extractors/dotnetfile.py | 39 ++++++++++++++++++++-- tests/fixtures.py | 3 ++ 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 99e2643ce..78d13602e 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -13,19 +13,23 @@ if TYPE_CHECKING: import dnfile from capa.features.common import Feature, Format - from capa.features.file import Import + from capa.features.file import Import, FunctionName import capa.features.extractors def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: - yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe) + yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe) def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe) +def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe) + + def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: for file_handler in FILE_HANDLERS: for (feature, token) in file_handler(pe): @@ -35,6 +39,6 @@ def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: FILE_HANDLERS = ( extract_file_import_names, # TODO extract_file_strings, - # TODO extract_file_function_names, + extract_file_function_names, extract_file_format, ) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index c73044622..9e44976ea 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -167,3 +167,7 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody] continue yield body + + +def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: + return getattr(pe.net.mdtables, table_name, None) is not None diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index a9a2c6001..f99c665ef 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -6,10 +6,15 @@ import pefile import capa.features.extractors.helpers -from capa.features.file import Import +from capa.features.file import Import, FunctionName from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature from capa.features.extractors.base_extractor import FeatureExtractor -from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports +from capa.features.extractors.dnfile.helpers import ( + is_dotnet_table_valid, + get_dotnet_managed_imports, + calculate_dotnet_token_value, + get_dotnet_unmanaged_imports, +) logger = logging.getLogger(__name__) @@ -30,6 +35,34 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor yield Import(symbol_variant), token +def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, int]]: + """ + see https://www.ntcore.com/files/dotnetformat.htm + + 02 - TypeDef Table + Each row represents a class in the current assembly. + TypeName (index into String heap) + TypeNamespace (index into String heap) + MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) + """ + if not is_dotnet_table_valid(pe, "TypeDef"): + return + + for row in pe.net.mdtables.TypeDef.rows: + for index in row.MethodList: + # like File::OpenRead + name = f"{row.TypeName}::{index.row.Name}" + + # ECMA II.22.37: TypeNamespace can be null or non-null + if row.TypeNamespace: + # like System.IO.File::OpenRead + name = f"{row.TypeNamespace}.{name}" + + token = calculate_dotnet_token_value(index.table.number, index.row_index) + + yield FunctionName(name), token + + def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]: yield OS(OS_ANY), 0x0 @@ -54,7 +87,7 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: FILE_HANDLERS = ( extract_file_import_names, # TODO extract_file_strings, - # TODO extract_file_function_names, + extract_file_function_names, extract_file_format, ) diff --git a/tests/fixtures.py b/tests/fixtures.py index 7f5abc732..8959be373 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -671,6 +671,9 @@ def parametrize(params, values, **kwargs): ("mixed-mode-64", "file", Arch(ARCH_I386), False), ("b9f5b", "file", OS(OS_ANY), True), ("b9f5b", "file", Format(FORMAT_DOTNET), True), + ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::Main"), True), + ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.ctor"), True), + ("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.cctor"), False), ("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True), ("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True), ("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True), From 7f28c5020c09d32d334c1ecde4cefe15c58d2752 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 12:05:38 -0600 Subject: [PATCH 02/10] emit file function names as API features --- capa/features/extractors/dnfile/helpers.py | 29 ++++++++++++++++++++++ capa/features/extractors/dnfile/insn.py | 26 +++++++++++++++---- capa/features/extractors/dotnetfile.py | 29 +++------------------- 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 9e44976ea..b4e9608f0 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -171,3 +171,32 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody] def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: return getattr(pe.net.mdtables, table_name, None) is not None + + +def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get managed method names from TypeDef table + + see https://www.ntcore.com/files/dotnetformat.htm + + 02 - TypeDef Table + Each row represents a class in the current assembly. + TypeName (index into String heap) + TypeNamespace (index into String heap) + MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) + """ + if not is_dotnet_table_valid(pe, "TypeDef"): + return + + for row in pe.net.mdtables.TypeDef.rows: + for index in row.MethodList: + # like File::OpenRead + name = f"{row.TypeName}::{index.row.Name}" + + # ECMA II.22.37: TypeNamespace can be null or non-null + if row.TypeNamespace: + # like System.IO.File::OpenRead + name = f"{row.TypeNamespace}.{name}" + + token = calculate_dotnet_token_value(index.table.number, index.row_index) + + yield token, name diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index 262b97798..e876820f1 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -9,7 +9,6 @@ from __future__ import annotations from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional -from itertools import chain if TYPE_CHECKING: from dncil.cil.instruction import Instruction @@ -26,24 +25,41 @@ read_dotnet_user_string, get_dotnet_managed_imports, get_dotnet_unmanaged_imports, + get_dotnet_managed_method_names, ) def get_imports(ctx: Dict) -> Dict: if "imports_cache" not in ctx: - ctx["imports_cache"] = { - token: imp - for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"])) - } + ctx["imports_cache"] = {} + + for (token, name) in get_dotnet_managed_imports(ctx["pe"]): + ctx["imports_cache"][token] = name + for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]): + ctx["imports_cache"][token] = name + return ctx["imports_cache"] +def get_methods(ctx: Dict) -> Dict: + if "methods_cache" not in ctx: + ctx["methods_cache"] = {} + + for (token, name) in get_dotnet_managed_method_names(ctx["pe"]): + ctx["methods_cache"][token] = name + + return ctx["methods_cache"] + + def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]: """parse instruction API features""" if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return name: str = get_imports(f.ctx).get(insn.operand.value, "") + if not name: + name = get_methods(f.ctx).get(insn.operand.value, "") + if not name: return diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index f99c665ef..6e7a4b952 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -10,10 +10,10 @@ from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature from capa.features.extractors.base_extractor import FeatureExtractor from capa.features.extractors.dnfile.helpers import ( - is_dotnet_table_valid, get_dotnet_managed_imports, calculate_dotnet_token_value, get_dotnet_unmanaged_imports, + get_dotnet_managed_method_names, ) logger = logging.getLogger(__name__) @@ -36,31 +36,8 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, int]]: - """ - see https://www.ntcore.com/files/dotnetformat.htm - - 02 - TypeDef Table - Each row represents a class in the current assembly. - TypeName (index into String heap) - TypeNamespace (index into String heap) - MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) - """ - if not is_dotnet_table_valid(pe, "TypeDef"): - return - - for row in pe.net.mdtables.TypeDef.rows: - for index in row.MethodList: - # like File::OpenRead - name = f"{row.TypeName}::{index.row.Name}" - - # ECMA II.22.37: TypeNamespace can be null or non-null - if row.TypeNamespace: - # like System.IO.File::OpenRead - name = f"{row.TypeNamespace}.{name}" - - token = calculate_dotnet_token_value(index.table.number, index.row_index) - - yield FunctionName(name), token + for (token, name) in get_dotnet_managed_method_names(pe): + yield FunctionName(name), token def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]: From cfd5fd311eac8ef0a33686819ec14946c673618b Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 12:11:28 -0600 Subject: [PATCH 03/10] refactor file import name extraction --- capa/features/extractors/dotnetfile.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 6e7a4b952..8ee8d01fc 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -24,15 +24,15 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]: def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]: - for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)): - if "::" in imp: - # like System.IO.File::OpenRead - yield Import(imp), token - else: - # like kernel32.CreateFileA - dll, _, symbol = imp.rpartition(".") - for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): - yield Import(symbol_variant), token + for (token, name) in get_dotnet_managed_imports(pe): + # like System.IO.File::OpenRead + yield Import(name), token + + for (token, name) in get_dotnet_unmanaged_imports(pe): + # like kernel32.CreateFileA + dll, _, symbol = name.rpartition(".") + for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield Import(name_variant), token def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, int]]: From a01f80853ee468f348912d536119e3cceea8d726 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 12:18:25 -0600 Subject: [PATCH 04/10] fix formatting --- capa/features/extractors/dnfile/insn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index e876820f1..15a75ae06 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -37,7 +37,7 @@ def get_imports(ctx: Dict) -> Dict: ctx["imports_cache"][token] = name for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]): ctx["imports_cache"][token] = name - + return ctx["imports_cache"] @@ -47,7 +47,7 @@ def get_methods(ctx: Dict) -> Dict: for (token, name) in get_dotnet_managed_method_names(ctx["pe"]): ctx["methods_cache"][token] = name - + return ctx["methods_cache"] From 4a56f846102c5d46edfe85056f0d999a0a56d739 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 12:33:38 -0600 Subject: [PATCH 05/10] core refactor, improve file import name extraction --- capa/features/extractors/dnfile/helpers.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index b4e9608f0..dd694d6d3 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -112,11 +112,17 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): continue - token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) - # like System.IO.File::OpenRead - imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}" + # like File::OpenRead + name = f"{row.Class.row.TypeName}::{row.Name}" - yield token, imp + # ECMA II.22.38: TypeNamespace can be null or non-null + if row.Class.row.TypeNamespace: + # like System.IO.File::OpenRead + name = f"{row.Class.row.TypeNamespace}.{name}" + + token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1) + + yield token, name def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: @@ -147,9 +153,9 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: dll = dll.split(".")[0] # like kernel32.CreateFileA - imp: str = f"{dll}.{symbol}" + name: str = f"{dll}.{symbol}" - yield token, imp + yield token, name def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: @@ -187,7 +193,7 @@ def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str] if not is_dotnet_table_valid(pe, "TypeDef"): return - for row in pe.net.mdtables.TypeDef.rows: + for row in pe.net.mdtables.TypeDef: for index in row.MethodList: # like File::OpenRead name = f"{row.TypeName}::{index.row.Name}" From 20b9357a2cc392fdd699ecb4c39524efc8056b4e Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 13:32:29 -0600 Subject: [PATCH 06/10] update dotnet meta table checks --- capa/features/extractors/dnfile/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 189a39013..8a90c77a4 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -105,7 +105,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - if not hasattr(pe.net.mdtables, "MemberRef") or pe.net.mdtables.MemberRef is None: + if not is_dotnet_table_valid(pe, "MemberRef"): return for (rid, row) in enumerate(pe.net.mdtables.MemberRef): @@ -136,7 +136,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: ImportName (index into the String heap) ImportScope (index into the ModuleRef table) """ - if not hasattr(pe.net.mdtables, "ImplMap") or pe.net.mdtables.ImplMap is None: + if not is_dotnet_table_valid(pe, "ImplMap"): return for row in pe.net.mdtables.ImplMap: @@ -160,7 +160,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: """get managed methods from MethodDef table""" - if not hasattr(pe.net.mdtables, "MethodDef") or pe.net.mdtables.MethodDef is None: + if not is_dotnet_table_valid(pe, "MethodDef"): return for row in pe.net.mdtables.MethodDef: From dfc2de5bdb6f76d3d8569ecfe1a9640137166cea Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 13:40:09 -0600 Subject: [PATCH 07/10] remove unused imports --- capa/features/extractors/dotnetfile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 8ee8d01fc..86014d047 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -1,6 +1,5 @@ import logging from typing import Tuple, Iterator -from itertools import chain import dnfile import pefile From 40f4c556924a864dab82ef43066bc9606362a3d6 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 13:45:58 -0600 Subject: [PATCH 08/10] fix formatting --- capa/features/extractors/dnfile/file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 1fe687e8a..4168249a0 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -29,7 +29,7 @@ def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, int]]: yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe) - + def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, int]]: yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe) From 47ae1996dca5cd5dcfffde66d0afda86e818c450 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 5 May 2022 13:55:44 -0600 Subject: [PATCH 09/10] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d566726..aba769516 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - extract additional offset/number features in certain circumstances #320 @williballenthin - add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin - add file string extraction for dotnet files #1012 @mike-hunhoff + - add file function-name extraction for dotnet files #1015 @mike-hunhoff ### Breaking Changes From d5e691b7146e6d66322fa834b0aa5a4fa27fcc50 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 6 May 2022 07:58:44 -0600 Subject: [PATCH 10/10] pr review changes --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 8a90c77a4..ddca5e616 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -176,7 +176,7 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody] def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: - return getattr(pe.net.mdtables, table_name, None) is not None + return bool(getattr(pe.net.mdtables, table_name, None)) def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: