Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dotnet: extract file function names #1015

Merged
merged 12 commits into from
May 6, 2022
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- extract additional offset/number features in certain circumstances #320 @williballenthin
- add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin
- add file string extraction for dotnet files #1012 @mike-hunhoff
- add file function-name extraction for dotnet files #1015 @mike-hunhoff

### Breaking Changes

Expand Down
10 changes: 7 additions & 3 deletions capa/features/extractors/dnfile/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,23 @@
if TYPE_CHECKING:
import dnfile
from capa.features.common import Feature, Format, String
from capa.features.file import Import
from capa.features.file import Import, FunctionName

import capa.features.extractors


def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe)
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)


def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)


def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)


def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)

Expand All @@ -38,7 +42,7 @@ def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:

FILE_HANDLERS = (
extract_file_import_names,
extract_file_function_names,
extract_file_strings,
# TODO extract_file_function_names,
extract_file_format,
)
57 changes: 48 additions & 9 deletions capa/features/extractors/dnfile/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,24 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
TypeName (index into String heap)
TypeNamespace (index into String heap)
"""
if not hasattr(pe.net.mdtables, "MemberRef") or pe.net.mdtables.MemberRef is None:
if not is_dotnet_table_valid(pe, "MemberRef"):
mike-hunhoff marked this conversation as resolved.
Show resolved Hide resolved
return

for (rid, row) in enumerate(pe.net.mdtables.MemberRef):
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)):
continue

token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1)
# like System.IO.File::OpenRead
imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}"
# like File::OpenRead
name = f"{row.Class.row.TypeName}::{row.Name}"

yield token, imp
# ECMA II.22.38: TypeNamespace can be null or non-null
if row.Class.row.TypeNamespace:
# like System.IO.File::OpenRead
name = f"{row.Class.row.TypeNamespace}.{name}"

token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1)

yield token, name


def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
Expand All @@ -130,7 +136,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
ImportName (index into the String heap)
ImportScope (index into the ModuleRef table)
"""
if not hasattr(pe.net.mdtables, "ImplMap") or pe.net.mdtables.ImplMap is None:
if not is_dotnet_table_valid(pe, "ImplMap"):
return

for row in pe.net.mdtables.ImplMap:
Expand All @@ -147,14 +153,14 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
dll = dll.split(".")[0]

# like kernel32.CreateFileA
imp: str = f"{dll}.{symbol}"
name: str = f"{dll}.{symbol}"

yield token, imp
yield token, name


def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]:
"""get managed methods from MethodDef table"""
if not hasattr(pe.net.mdtables, "MethodDef") or pe.net.mdtables.MethodDef is None:
if not is_dotnet_table_valid(pe, "MethodDef"):
return

for row in pe.net.mdtables.MethodDef:
Expand All @@ -167,3 +173,36 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]
continue

yield body


def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool:
return getattr(pe.net.mdtables, table_name, None) is not None
mike-hunhoff marked this conversation as resolved.
Show resolved Hide resolved


def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
"""get managed method names from TypeDef table

see https://www.ntcore.com/files/dotnetformat.htm

02 - TypeDef Table
Each row represents a class in the current assembly.
TypeName (index into String heap)
TypeNamespace (index into String heap)
MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type)
"""
if not is_dotnet_table_valid(pe, "TypeDef"):
return

for row in pe.net.mdtables.TypeDef:
for index in row.MethodList:
# like File::OpenRead
name = f"{row.TypeName}::{index.row.Name}"

# ECMA II.22.37: TypeNamespace can be null or non-null
if row.TypeNamespace:
# like System.IO.File::OpenRead
name = f"{row.TypeNamespace}.{name}"

token = calculate_dotnet_token_value(index.table.number, index.row_index)

yield token, name
mike-hunhoff marked this conversation as resolved.
Show resolved Hide resolved
26 changes: 21 additions & 5 deletions capa/features/extractors/dnfile/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional
from itertools import chain

if TYPE_CHECKING:
from dncil.cil.instruction import Instruction
Expand All @@ -26,24 +25,41 @@
read_dotnet_user_string,
get_dotnet_managed_imports,
get_dotnet_unmanaged_imports,
get_dotnet_managed_method_names,
)


def get_imports(ctx: Dict) -> Dict:
if "imports_cache" not in ctx:
ctx["imports_cache"] = {
token: imp
for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"]))
}
ctx["imports_cache"] = {}

for (token, name) in get_dotnet_managed_imports(ctx["pe"]):
ctx["imports_cache"][token] = name
for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]):
ctx["imports_cache"][token] = name

return ctx["imports_cache"]


def get_methods(ctx: Dict) -> Dict:
if "methods_cache" not in ctx:
ctx["methods_cache"] = {}

for (token, name) in get_dotnet_managed_method_names(ctx["pe"]):
ctx["methods_cache"][token] = name

return ctx["methods_cache"]


def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]:
"""parse instruction API features"""
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return

name: str = get_imports(f.ctx).get(insn.operand.value, "")
if not name:
name = get_methods(f.ctx).get(insn.operand.value, "")

if not name:
return

Expand Down
35 changes: 22 additions & 13 deletions capa/features/extractors/dotnetfile.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import logging
from typing import Tuple, Iterator
from itertools import chain

import dnfile
import pefile

import capa.features.extractors.helpers
from capa.features.file import Import
from capa.features.file import Import, FunctionName
from capa.features.common import (
OS,
OS_ANY,
Expand All @@ -20,7 +19,12 @@
Feature,
)
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports
from capa.features.extractors.dnfile.helpers import (
get_dotnet_managed_imports,
calculate_dotnet_token_value,
get_dotnet_unmanaged_imports,
get_dotnet_managed_method_names,
)

logger = logging.getLogger(__name__)

Expand All @@ -30,15 +34,20 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]:


def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]:
for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)):
if "::" in imp:
# like System.IO.File::OpenRead
yield Import(imp), token
else:
# like kernel32.CreateFileA
dll, _, symbol = imp.rpartition(".")
for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield Import(symbol_variant), token
for (token, name) in get_dotnet_managed_imports(pe):
# like System.IO.File::OpenRead
yield Import(name), token

for (token, name) in get_dotnet_unmanaged_imports(pe):
# like kernel32.CreateFileA
dll, _, symbol = name.rpartition(".")
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield Import(name_variant), token


def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, int]]:
for (token, name) in get_dotnet_managed_method_names(pe):
yield FunctionName(name), token


def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]:
Expand Down Expand Up @@ -68,8 +77,8 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:

FILE_HANDLERS = (
extract_file_import_names,
extract_file_function_names,
extract_file_strings,
# TODO extract_file_function_names,
extract_file_format,
)

Expand Down
3 changes: 3 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,9 @@ def parametrize(params, values, **kwargs):
("mixed-mode-64", "file", Arch(ARCH_I386), False),
("b9f5b", "file", OS(OS_ANY), True),
("b9f5b", "file", Format(FORMAT_DOTNET), True),
("hello-world", "file", capa.features.file.FunctionName("HelloWorld::Main"), True),
("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.ctor"), True),
("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.cctor"), False),
("hello-world", "file", capa.features.common.String("Hello World!"), True),
("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True),
("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True),
Expand Down