Skip to content

Commit

Permalink
dotnet: extract file function names (#1015)
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-hunhoff authored May 6, 2022
1 parent ff28106 commit a9c9b3c
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- extract additional offset/number features in certain circumstances #320 @williballenthin
- add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin
- add file string extraction for dotnet files #1012 @mike-hunhoff
- add file function-name extraction for dotnet files #1015 @mike-hunhoff

### Breaking Changes

Expand Down
10 changes: 7 additions & 3 deletions capa/features/extractors/dnfile/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,23 @@
if TYPE_CHECKING:
import dnfile
from capa.features.common import Feature, Format, String
from capa.features.file import Import
from capa.features.file import Import, FunctionName

import capa.features.extractors


def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe)
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)


def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)


def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)


def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)

Expand All @@ -38,7 +42,7 @@ def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:

FILE_HANDLERS = (
extract_file_import_names,
extract_file_function_names,
extract_file_strings,
# TODO extract_file_function_names,
extract_file_format,
)
57 changes: 48 additions & 9 deletions capa/features/extractors/dnfile/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,24 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
TypeName (index into String heap)
TypeNamespace (index into String heap)
"""
if not hasattr(pe.net.mdtables, "MemberRef") or pe.net.mdtables.MemberRef is None:
if not is_dotnet_table_valid(pe, "MemberRef"):
return

for (rid, row) in enumerate(pe.net.mdtables.MemberRef):
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)):
continue

token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1)
# like System.IO.File::OpenRead
imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}"
# like File::OpenRead
name = f"{row.Class.row.TypeName}::{row.Name}"

yield token, imp
# ECMA II.22.38: TypeNamespace can be null or non-null
if row.Class.row.TypeNamespace:
# like System.IO.File::OpenRead
name = f"{row.Class.row.TypeNamespace}.{name}"

token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1)

yield token, name


def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
Expand All @@ -130,7 +136,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
ImportName (index into the String heap)
ImportScope (index into the ModuleRef table)
"""
if not hasattr(pe.net.mdtables, "ImplMap") or pe.net.mdtables.ImplMap is None:
if not is_dotnet_table_valid(pe, "ImplMap"):
return

for row in pe.net.mdtables.ImplMap:
Expand All @@ -147,14 +153,14 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
dll = dll.split(".")[0]

# like kernel32.CreateFileA
imp: str = f"{dll}.{symbol}"
name: str = f"{dll}.{symbol}"

yield token, imp
yield token, name


def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]:
"""get managed methods from MethodDef table"""
if not hasattr(pe.net.mdtables, "MethodDef") or pe.net.mdtables.MethodDef is None:
if not is_dotnet_table_valid(pe, "MethodDef"):
return

for row in pe.net.mdtables.MethodDef:
Expand All @@ -167,3 +173,36 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]
continue

yield body


def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool:
return bool(getattr(pe.net.mdtables, table_name, None))


def get_dotnet_managed_method_names(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
"""get managed method names from TypeDef table
see https://www.ntcore.com/files/dotnetformat.htm
02 - TypeDef Table
Each row represents a class in the current assembly.
TypeName (index into String heap)
TypeNamespace (index into String heap)
MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type)
"""
if not is_dotnet_table_valid(pe, "TypeDef"):
return

for row in pe.net.mdtables.TypeDef:
for index in row.MethodList:
# like File::OpenRead
name = f"{row.TypeName}::{index.row.Name}"

# ECMA II.22.37: TypeNamespace can be null or non-null
if row.TypeNamespace:
# like System.IO.File::OpenRead
name = f"{row.TypeNamespace}.{name}"

token = calculate_dotnet_token_value(index.table.number, index.row_index)

yield token, name
26 changes: 21 additions & 5 deletions capa/features/extractors/dnfile/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional
from itertools import chain

if TYPE_CHECKING:
from dncil.cil.instruction import Instruction
Expand All @@ -26,24 +25,41 @@
read_dotnet_user_string,
get_dotnet_managed_imports,
get_dotnet_unmanaged_imports,
get_dotnet_managed_method_names,
)


def get_imports(ctx: Dict) -> Dict:
if "imports_cache" not in ctx:
ctx["imports_cache"] = {
token: imp
for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"]))
}
ctx["imports_cache"] = {}

for (token, name) in get_dotnet_managed_imports(ctx["pe"]):
ctx["imports_cache"][token] = name
for (token, name) in get_dotnet_unmanaged_imports(ctx["pe"]):
ctx["imports_cache"][token] = name

return ctx["imports_cache"]


def get_methods(ctx: Dict) -> Dict:
if "methods_cache" not in ctx:
ctx["methods_cache"] = {}

for (token, name) in get_dotnet_managed_method_names(ctx["pe"]):
ctx["methods_cache"][token] = name

return ctx["methods_cache"]


def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]:
"""parse instruction API features"""
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return

name: str = get_imports(f.ctx).get(insn.operand.value, "")
if not name:
name = get_methods(f.ctx).get(insn.operand.value, "")

if not name:
return

Expand Down
35 changes: 22 additions & 13 deletions capa/features/extractors/dotnetfile.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import logging
from typing import Tuple, Iterator
from itertools import chain

import dnfile
import pefile

import capa.features.extractors.helpers
from capa.features.file import Import
from capa.features.file import Import, FunctionName
from capa.features.common import (
OS,
OS_ANY,
Expand All @@ -20,7 +19,12 @@
Feature,
)
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports
from capa.features.extractors.dnfile.helpers import (
get_dotnet_managed_imports,
calculate_dotnet_token_value,
get_dotnet_unmanaged_imports,
get_dotnet_managed_method_names,
)

logger = logging.getLogger(__name__)

Expand All @@ -30,15 +34,20 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]:


def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]:
for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)):
if "::" in imp:
# like System.IO.File::OpenRead
yield Import(imp), token
else:
# like kernel32.CreateFileA
dll, _, symbol = imp.rpartition(".")
for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield Import(symbol_variant), token
for (token, name) in get_dotnet_managed_imports(pe):
# like System.IO.File::OpenRead
yield Import(name), token

for (token, name) in get_dotnet_unmanaged_imports(pe):
# like kernel32.CreateFileA
dll, _, symbol = name.rpartition(".")
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield Import(name_variant), token


def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, int]]:
for (token, name) in get_dotnet_managed_method_names(pe):
yield FunctionName(name), token


def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]:
Expand Down Expand Up @@ -68,8 +77,8 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:

FILE_HANDLERS = (
extract_file_import_names,
extract_file_function_names,
extract_file_strings,
# TODO extract_file_function_names,
extract_file_format,
)

Expand Down
3 changes: 3 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,9 @@ def parametrize(params, values, **kwargs):
("mixed-mode-64", "file", Arch(ARCH_I386), False),
("b9f5b", "file", OS(OS_ANY), True),
("b9f5b", "file", Format(FORMAT_DOTNET), True),
("hello-world", "file", capa.features.file.FunctionName("HelloWorld::Main"), True),
("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.ctor"), True),
("hello-world", "file", capa.features.file.FunctionName("HelloWorld::.cctor"), False),
("hello-world", "file", capa.features.common.String("Hello World!"), True),
("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True),
("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True),
Expand Down

0 comments on commit a9c9b3c

Please sign in to comment.