Skip to content

Commit

Permalink
Fix imphash issue with empty import names. (#1944)
Browse files Browse the repository at this point in the history
* Fix imphash issue with empty import names.

If an import has an empty name skip processing it. This is consistent with the
behavior of pefile (https://github.com/erocarrera/pefile/blob/593d094e35198dad92aaf040bef17eb800c8a373/pefile.py#L5871-L5872).

Add a test case which is just the tiny test file with the first import name set
to all NULL bytes. I tested that pefile calculates the imphash of it, which
matches what YARA now calculates too:

>>> import pefile
>>> pe = pefile.PE('/Users/wxs/src/yara/tests/data/tiny_empty_import_name')
>>> pe.get_imphash()
'0eff3a0eb037af8c1ef0bada984d6af5'
>>>

Fixes #1943

* Add test file forgot in last commit.

* Handle invalid import names.

If an imported function does not contain ONLY a-zA-Z0-9 and a small subset of
special characters it will now be ignored.  This aligns us better with pefile,
which checks for valid import names and skips them if they are invalid.

I've also updated the test file to check that these special characters are
handled properly.

* Fix test after alignment with pefile.

Turns out that the tiny-idata-5200 file is corrupted to the point that pefile
doesn't parse it. For example, it finds no imports to hash:

```
>>> pe = pefile.PE('tests/data/tiny-idata-5200')
>>> pe.get_imphash()
''
>>>
```

We were parsing imports from this file before these changes that were incorrect,
so fix the tests to reflect the fact that we parse no imports from this file
anymore.

As part of this I've split the checks for number of parsed imports and
successfully parsed imports into two different counters, which now means we are
accurately reflecting when we are able to parse the import table but not the
descriptors in it while still making sure we don't parse too many as we have
seen before.

* Move declarations to align better.

Move these declaractions so they are with the rest of the constants. This makes
the output of -D easier to read.

* Fix memory leak when handling corrupt imports.

If we have an invalid import name we need to free the name and continue to the
next thunk and function. While fixing this I noticed that if we fail to alloc an
IMPORT_FUNCTION* we would end up looping endlessly because we were never
incrementing the thunk pointer or function index. Fix it by ALWAYS incrementing
those at the end of the loop and conditionally populating the newly allocated
node.
  • Loading branch information
wxsBSD authored Aug 18, 2023
1 parent bc0f052 commit e94ec7b
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 108 deletions.
135 changes: 93 additions & 42 deletions libyara/modules/pe/pe.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,8 @@ static void pe_parse_debug_directory(PE* pe)
{
int64_t pcv_hdr_offset = 0;

debug_dir =
(PIMAGE_DEBUG_DIRECTORY) (pe->data + debug_dir_offset + i * sizeof(IMAGE_DEBUG_DIRECTORY));
debug_dir = (PIMAGE_DEBUG_DIRECTORY) (pe->data + debug_dir_offset +
i * sizeof(IMAGE_DEBUG_DIRECTORY));

if (!struct_fits_in_pe(pe, debug_dir, IMAGE_DEBUG_DIRECTORY))
break;
Expand Down Expand Up @@ -409,7 +409,8 @@ static const PIMAGE_RESOURCE_DIR_STRING_U parse_resource_name(
if (yr_le32toh(entry->Name) & 0x80000000)
{
const PIMAGE_RESOURCE_DIR_STRING_U pNameString =
(PIMAGE_RESOURCE_DIR_STRING_U) (rsrc_data + (yr_le32toh(entry->Name) & 0x7FFFFFFF));
(PIMAGE_RESOURCE_DIR_STRING_U) (rsrc_data +
(yr_le32toh(entry->Name) & 0x7FFFFFFF));

// A resource directory string is 2 bytes for the length and then a variable
// length Unicode string. Make sure we have at least 2 bytes.
Expand All @@ -419,7 +420,9 @@ static const PIMAGE_RESOURCE_DIR_STRING_U parse_resource_name(

// Move past the length and make sure we have enough bytes for the string.
if (!fits_in_pe(
pe, pNameString, sizeof(uint16_t) + yr_le16toh(pNameString->Length) * 2))
pe,
pNameString,
sizeof(uint16_t) + yr_le16toh(pNameString->Length) * 2))
return NULL;

return pNameString;
Expand Down Expand Up @@ -806,6 +809,25 @@ static int pe_collect_resources(
return RESOURCE_CALLBACK_CONTINUE;
}

// Function names should have only lowercase, uppercase, digits and a small
// subset of special characters. This is to match behavior of pefile. See
// https://github.com/erocarrera/pefile/blob/593d094e35198dad92aaf040bef17eb800c8a373/pefile.py#L2326-L2348
static int valid_function_name(char* name)
{
if (!strcmp(name, ""))
return 0;

size_t i = 0;
for (char c = name[i]; c != '\x00'; c = name[++i])
{
if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') &&
!(c >= '0' && c <= '9') && c != '.' && c != '_' && c != '?' &&
c != '@' && c != '$' && c != '(' && c != ')' && c != '<' && c != '>')
return 0;
}
return 1;
}

static IMPORT_FUNCTION* pe_parse_import_descriptor(
PE* pe,
PIMAGE_IMPORT_DESCRIPTOR import_descriptor,
Expand All @@ -814,6 +836,11 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
{
IMPORT_FUNCTION* head = NULL;
IMPORT_FUNCTION* tail = NULL;
// This is tracked separately from num_function_imports because that is the
// number of successfully parsed imports, while this is the number of imports
// attempted to be parsed. This allows us to stop parsing on too many imports
// while still accurately recording the number of successfully parsed imports.
int parsed_imports = 0;

int64_t offset = pe_rva_to_offset(
pe, yr_le32toh(import_descriptor->OriginalFirstThunk));
Expand All @@ -834,22 +861,24 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(

while (struct_fits_in_pe(pe, thunks64, IMAGE_THUNK_DATA64) &&
yr_le64toh(thunks64->u1.Ordinal) != 0 &&
*num_function_imports < MAX_PE_IMPORTS)
parsed_imports < MAX_PE_IMPORTS)
{
char* name = NULL;
uint16_t ordinal = 0;
uint8_t has_ordinal = 0;
uint64_t rva_address = 0;

parsed_imports++;

if (!(yr_le64toh(thunks64->u1.Ordinal) & IMAGE_ORDINAL_FLAG64))
{
// If imported by name
offset = pe_rva_to_offset(pe, yr_le64toh(thunks64->u1.Function));

if (offset >= 0)
{
PIMAGE_IMPORT_BY_NAME import =
(PIMAGE_IMPORT_BY_NAME) (pe->data + offset);
PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME) (pe->data +
offset);

if (struct_fits_in_pe(pe, import, IMAGE_IMPORT_BY_NAME))
{
Expand All @@ -871,6 +900,14 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
rva_address = yr_le32toh(import_descriptor->FirstThunk) +
(sizeof(uint64_t) * func_idx);

if (name != NULL && !valid_function_name(name))
{
yr_free(name);
thunks64++;
func_idx++;
continue;
}

if (name != NULL || has_ordinal == 1)
{
IMPORT_FUNCTION* imported_func = (IMPORT_FUNCTION*) yr_calloc(
Expand All @@ -879,25 +916,26 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
if (imported_func == NULL)
{
yr_free(name);
continue;
}
else
{
imported_func->name = name;
imported_func->ordinal = ordinal;
imported_func->has_ordinal = has_ordinal;
imported_func->rva = rva_address;
imported_func->next = NULL;

imported_func->name = name;
imported_func->ordinal = ordinal;
imported_func->has_ordinal = has_ordinal;
imported_func->rva = rva_address;
imported_func->next = NULL;

if (head == NULL)
head = imported_func;
if (head == NULL)
head = imported_func;

if (tail != NULL)
tail->next = imported_func;
if (tail != NULL)
tail->next = imported_func;

tail = imported_func;
tail = imported_func;
(*num_function_imports)++;
}
}

(*num_function_imports)++;
thunks64++;
func_idx++;
}
Expand All @@ -916,15 +954,17 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
uint8_t has_ordinal = 0;
uint32_t rva_address = 0;

parsed_imports++;

if (!(yr_le32toh(thunks32->u1.Ordinal) & IMAGE_ORDINAL_FLAG32))
{
// If imported by name
offset = pe_rva_to_offset(pe, yr_le32toh(thunks32->u1.Function));

if (offset >= 0)
{
PIMAGE_IMPORT_BY_NAME import =
(PIMAGE_IMPORT_BY_NAME) (pe->data + offset);
PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME) (pe->data +
offset);

if (struct_fits_in_pe(pe, import, IMAGE_IMPORT_BY_NAME))
{
Expand All @@ -946,6 +986,14 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
rva_address = yr_le32toh(import_descriptor->FirstThunk) +
(sizeof(uint32_t) * func_idx);

if (name != NULL && !valid_function_name(name))
{
yr_free(name);
thunks32++;
func_idx++;
continue;
}

if (name != NULL || has_ordinal == 1)
{
IMPORT_FUNCTION* imported_func = (IMPORT_FUNCTION*) yr_calloc(
Expand All @@ -954,25 +1002,26 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
if (imported_func == NULL)
{
yr_free(name);
continue;
}
else
{
imported_func->name = name;
imported_func->ordinal = ordinal;
imported_func->has_ordinal = has_ordinal;
imported_func->rva = rva_address;
imported_func->next = NULL;

imported_func->name = name;
imported_func->ordinal = ordinal;
imported_func->has_ordinal = has_ordinal;
imported_func->rva = rva_address;
imported_func->next = NULL;

if (head == NULL)
head = imported_func;
if (head == NULL)
head = imported_func;

if (tail != NULL)
tail->next = imported_func;
if (tail != NULL)
tail->next = imported_func;

tail = imported_func;
tail = imported_func;
(*num_function_imports)++;
}
}

(*num_function_imports)++;
thunks32++;
func_idx++;
}
Expand Down Expand Up @@ -1072,6 +1121,7 @@ void pe_set_imports(
static IMPORTED_DLL* pe_parse_imports(PE* pe)
{
int64_t offset;
int parsed_imports = 0; // Number of parsed DLLs
int num_imports = 0; // Number of imported DLLs
int num_function_imports = 0; // Total number of functions imported

Expand Down Expand Up @@ -1101,8 +1151,10 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe)
imports = (PIMAGE_IMPORT_DESCRIPTOR) (pe->data + offset);

while (struct_fits_in_pe(pe, imports, IMAGE_IMPORT_DESCRIPTOR) &&
yr_le32toh(imports->Name) != 0 && num_imports < MAX_PE_IMPORTS)
yr_le32toh(imports->Name) != 0 && parsed_imports < MAX_PE_IMPORTS)
{
parsed_imports++;

int64_t offset = pe_rva_to_offset(pe, yr_le32toh(imports->Name));

if (offset >= 0)
Expand All @@ -1127,7 +1179,6 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe)
if (functions != NULL)
{
imported_dll->name = yr_strdup(dll_name);
;
imported_dll->functions = functions;
imported_dll->next = NULL;

Expand All @@ -1138,6 +1189,7 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe)
tail->next = imported_dll;

tail = imported_dll;
num_imports++;
}
else
{
Expand All @@ -1146,7 +1198,6 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe)
}
}

num_imports++;
imports++;
}

Expand Down Expand Up @@ -3603,6 +3654,10 @@ begin_declarations
declare_integer("IMAGE_DEBUG_TYPE_MPX");
declare_integer("IMAGE_DEBUG_TYPE_REPRO");

declare_integer("IMPORT_DELAYED");
declare_integer("IMPORT_STANDARD");
declare_integer("IMPORT_ANY");

declare_integer("is_pe");
declare_integer("machine");
declare_integer("number_of_sections");
Expand Down Expand Up @@ -3711,10 +3766,6 @@ begin_declarations
declare_function("imphash", "", "s", imphash);
#endif

declare_integer("IMPORT_DELAYED");
declare_integer("IMPORT_STANDARD");
declare_integer("IMPORT_ANY");

declare_function("section_index", "s", "i", section_index_name);
declare_function("section_index", "i", "i", section_index_addr);
declare_function("exports", "s", "i", exports);
Expand Down
Binary file added tests/data/tiny_empty_import_name
Binary file not shown.
Loading

0 comments on commit e94ec7b

Please sign in to comment.