diff --git a/libyara/modules/pe/pe.c b/libyara/modules/pe/pe.c index df67549cc9..f465fb0318 100644 --- a/libyara/modules/pe/pe.c +++ b/libyara/modules/pe/pe.c @@ -326,8 +326,8 @@ static void pe_parse_debug_directory(PE* pe) { int64_t pcv_hdr_offset = 0; - debug_dir = - (PIMAGE_DEBUG_DIRECTORY) (pe->data + debug_dir_offset + i * sizeof(IMAGE_DEBUG_DIRECTORY)); + debug_dir = (PIMAGE_DEBUG_DIRECTORY) (pe->data + debug_dir_offset + + i * sizeof(IMAGE_DEBUG_DIRECTORY)); if (!struct_fits_in_pe(pe, debug_dir, IMAGE_DEBUG_DIRECTORY)) break; @@ -409,7 +409,8 @@ static const PIMAGE_RESOURCE_DIR_STRING_U parse_resource_name( if (yr_le32toh(entry->Name) & 0x80000000) { const PIMAGE_RESOURCE_DIR_STRING_U pNameString = - (PIMAGE_RESOURCE_DIR_STRING_U) (rsrc_data + (yr_le32toh(entry->Name) & 0x7FFFFFFF)); + (PIMAGE_RESOURCE_DIR_STRING_U) (rsrc_data + + (yr_le32toh(entry->Name) & 0x7FFFFFFF)); // A resource directory string is 2 bytes for the length and then a variable // length Unicode string. Make sure we have at least 2 bytes. @@ -419,7 +420,9 @@ static const PIMAGE_RESOURCE_DIR_STRING_U parse_resource_name( // Move past the length and make sure we have enough bytes for the string. if (!fits_in_pe( - pe, pNameString, sizeof(uint16_t) + yr_le16toh(pNameString->Length) * 2)) + pe, + pNameString, + sizeof(uint16_t) + yr_le16toh(pNameString->Length) * 2)) return NULL; return pNameString; @@ -806,6 +809,25 @@ static int pe_collect_resources( return RESOURCE_CALLBACK_CONTINUE; } +// Function names should have only lowercase, uppercase, digits and a small +// subset of special characters. This is to match behavior of pefile. See +// https://github.com/erocarrera/pefile/blob/593d094e35198dad92aaf040bef17eb800c8a373/pefile.py#L2326-L2348 +static int valid_function_name(char* name) +{ + if (!strcmp(name, "")) + return 0; + + size_t i = 0; + for (char c = name[i]; c != '\x00'; c = name[++i]) + { + if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && + !(c >= '0' && c <= '9') && c != '.' && c != '_' && c != '?' && + c != '@' && c != '$' && c != '(' && c != ')' && c != '<' && c != '>') + return 0; + } + return 1; +} + static IMPORT_FUNCTION* pe_parse_import_descriptor( PE* pe, PIMAGE_IMPORT_DESCRIPTOR import_descriptor, @@ -814,6 +836,11 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( { IMPORT_FUNCTION* head = NULL; IMPORT_FUNCTION* tail = NULL; + // This is tracked separately from num_function_imports because that is the + // number of successfully parsed imports, while this is the number of imports + // attempted to be parsed. This allows us to stop parsing on too many imports + // while still accurately recording the number of successfully parsed imports. + int parsed_imports = 0; int64_t offset = pe_rva_to_offset( pe, yr_le32toh(import_descriptor->OriginalFirstThunk)); @@ -834,13 +861,15 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( while (struct_fits_in_pe(pe, thunks64, IMAGE_THUNK_DATA64) && yr_le64toh(thunks64->u1.Ordinal) != 0 && - *num_function_imports < MAX_PE_IMPORTS) + parsed_imports < MAX_PE_IMPORTS) { char* name = NULL; uint16_t ordinal = 0; uint8_t has_ordinal = 0; uint64_t rva_address = 0; + parsed_imports++; + if (!(yr_le64toh(thunks64->u1.Ordinal) & IMAGE_ORDINAL_FLAG64)) { // If imported by name @@ -848,8 +877,8 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( if (offset >= 0) { - PIMAGE_IMPORT_BY_NAME import = - (PIMAGE_IMPORT_BY_NAME) (pe->data + offset); + PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME) (pe->data + + offset); if (struct_fits_in_pe(pe, import, IMAGE_IMPORT_BY_NAME)) { @@ -871,6 +900,14 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( rva_address = yr_le32toh(import_descriptor->FirstThunk) + (sizeof(uint64_t) * func_idx); + if (name != NULL && !valid_function_name(name)) + { + yr_free(name); + thunks64++; + func_idx++; + continue; + } + if (name != NULL || has_ordinal == 1) { IMPORT_FUNCTION* imported_func = (IMPORT_FUNCTION*) yr_calloc( @@ -879,25 +916,26 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( if (imported_func == NULL) { yr_free(name); - continue; } + else + { + imported_func->name = name; + imported_func->ordinal = ordinal; + imported_func->has_ordinal = has_ordinal; + imported_func->rva = rva_address; + imported_func->next = NULL; - imported_func->name = name; - imported_func->ordinal = ordinal; - imported_func->has_ordinal = has_ordinal; - imported_func->rva = rva_address; - imported_func->next = NULL; - - if (head == NULL) - head = imported_func; + if (head == NULL) + head = imported_func; - if (tail != NULL) - tail->next = imported_func; + if (tail != NULL) + tail->next = imported_func; - tail = imported_func; + tail = imported_func; + (*num_function_imports)++; + } } - (*num_function_imports)++; thunks64++; func_idx++; } @@ -916,6 +954,8 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( uint8_t has_ordinal = 0; uint32_t rva_address = 0; + parsed_imports++; + if (!(yr_le32toh(thunks32->u1.Ordinal) & IMAGE_ORDINAL_FLAG32)) { // If imported by name @@ -923,8 +963,8 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( if (offset >= 0) { - PIMAGE_IMPORT_BY_NAME import = - (PIMAGE_IMPORT_BY_NAME) (pe->data + offset); + PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME) (pe->data + + offset); if (struct_fits_in_pe(pe, import, IMAGE_IMPORT_BY_NAME)) { @@ -946,6 +986,14 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( rva_address = yr_le32toh(import_descriptor->FirstThunk) + (sizeof(uint32_t) * func_idx); + if (name != NULL && !valid_function_name(name)) + { + yr_free(name); + thunks32++; + func_idx++; + continue; + } + if (name != NULL || has_ordinal == 1) { IMPORT_FUNCTION* imported_func = (IMPORT_FUNCTION*) yr_calloc( @@ -954,25 +1002,26 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor( if (imported_func == NULL) { yr_free(name); - continue; } + else + { + imported_func->name = name; + imported_func->ordinal = ordinal; + imported_func->has_ordinal = has_ordinal; + imported_func->rva = rva_address; + imported_func->next = NULL; - imported_func->name = name; - imported_func->ordinal = ordinal; - imported_func->has_ordinal = has_ordinal; - imported_func->rva = rva_address; - imported_func->next = NULL; - - if (head == NULL) - head = imported_func; + if (head == NULL) + head = imported_func; - if (tail != NULL) - tail->next = imported_func; + if (tail != NULL) + tail->next = imported_func; - tail = imported_func; + tail = imported_func; + (*num_function_imports)++; + } } - (*num_function_imports)++; thunks32++; func_idx++; } @@ -1072,6 +1121,7 @@ void pe_set_imports( static IMPORTED_DLL* pe_parse_imports(PE* pe) { int64_t offset; + int parsed_imports = 0; // Number of parsed DLLs int num_imports = 0; // Number of imported DLLs int num_function_imports = 0; // Total number of functions imported @@ -1101,8 +1151,10 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) imports = (PIMAGE_IMPORT_DESCRIPTOR) (pe->data + offset); while (struct_fits_in_pe(pe, imports, IMAGE_IMPORT_DESCRIPTOR) && - yr_le32toh(imports->Name) != 0 && num_imports < MAX_PE_IMPORTS) + yr_le32toh(imports->Name) != 0 && parsed_imports < MAX_PE_IMPORTS) { + parsed_imports++; + int64_t offset = pe_rva_to_offset(pe, yr_le32toh(imports->Name)); if (offset >= 0) @@ -1127,7 +1179,6 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) if (functions != NULL) { imported_dll->name = yr_strdup(dll_name); - ; imported_dll->functions = functions; imported_dll->next = NULL; @@ -1138,6 +1189,7 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) tail->next = imported_dll; tail = imported_dll; + num_imports++; } else { @@ -1146,7 +1198,6 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe) } } - num_imports++; imports++; } @@ -3603,6 +3654,10 @@ begin_declarations declare_integer("IMAGE_DEBUG_TYPE_MPX"); declare_integer("IMAGE_DEBUG_TYPE_REPRO"); + declare_integer("IMPORT_DELAYED"); + declare_integer("IMPORT_STANDARD"); + declare_integer("IMPORT_ANY"); + declare_integer("is_pe"); declare_integer("machine"); declare_integer("number_of_sections"); @@ -3711,10 +3766,6 @@ begin_declarations declare_function("imphash", "", "s", imphash); #endif - declare_integer("IMPORT_DELAYED"); - declare_integer("IMPORT_STANDARD"); - declare_integer("IMPORT_ANY"); - declare_function("section_index", "s", "i", section_index_name); declare_function("section_index", "i", "i", section_index_addr); declare_function("exports", "s", "i", exports); diff --git a/tests/data/tiny_empty_import_name b/tests/data/tiny_empty_import_name new file mode 100644 index 0000000000..e58620b3ef Binary files /dev/null and b/tests/data/tiny_empty_import_name differ diff --git a/tests/test-pe.c b/tests/test-pe.c index db6ca02ab3..9d77719396 100644 --- a/tests/test-pe.c +++ b/tests/test-pe.c @@ -48,11 +48,12 @@ int main(int argc, char** argv) }", "tests/data/tiny-idata-51ff"); - assert_false_rule_file( + // The imports are so corrupted that we can not parse any of them. + assert_true_rule_file( "import \"pe\" \ rule test { \ condition: \ - pe.imports(\"KERNEL32.dll\", \"DeleteCriticalSection\") \ + pe.number_of_imports == 0 and pe.number_of_imported_functions == 0 \ }", "tests/data/tiny-idata-5200"); @@ -72,22 +73,6 @@ int main(int argc, char** argv) }", "tests/data/tiny"); - assert_true_rule_file( - "import \"pe\" \ - rule test { \ - condition: \ - pe.imports(/.*/, /.*/) \ - }", - "tests/data/tiny-idata-5200"); - - assert_false_rule_file( - "import \"pe\" \ - rule test { \ - condition: \ - pe.imports(/.*/, /.*CriticalSection/) \ - }", - "tests/data/tiny-idata-5200"); - /////////////////////////////// assert_true_rule_file( @@ -106,14 +91,6 @@ int main(int argc, char** argv) }", "tests/data/tiny-idata-51ff"); - assert_false_rule_file( - "import \"pe\" \ - rule test { \ - condition: \ - pe.imports(pe.IMPORT_STANDARD, \"KERNEL32.dll\", \"DeleteCriticalSection\") \ - }", - "tests/data/tiny-idata-5200"); - assert_true_rule_file( "import \"pe\" \ rule test { \ @@ -130,22 +107,6 @@ int main(int argc, char** argv) }", "tests/data/tiny"); - assert_true_rule_file( - "import \"pe\" \ - rule test { \ - condition: \ - pe.imports(pe.IMPORT_STANDARD, /.*/, /.*/) \ - }", - "tests/data/tiny-idata-5200"); - - assert_false_rule_file( - "import \"pe\" \ - rule test { \ - condition: \ - pe.imports(pe.IMPORT_STANDARD, /.*/, /.*CriticalSection/) \ - }", - "tests/data/tiny-idata-5200"); - assert_true_rule_file( "import \"pe\" \ rule test { \ @@ -221,14 +182,6 @@ int main(int argc, char** argv) }", "tests/data/tiny-idata-51ff"); - assert_false_rule_file( - "import \"pe\" \ - rule test { \ - condition: \ - pe.imports(pe.IMPORT_ANY, \"KERNEL32.dll\", \"DeleteCriticalSection\") \ - }", - "tests/data/tiny-idata-5200"); - assert_true_rule_file( "import \"pe\" \ rule test { \ @@ -245,22 +198,6 @@ int main(int argc, char** argv) }", "tests/data/tiny"); - assert_true_rule_file( - "import \"pe\" \ - rule test { \ - condition: \ - pe.imports(pe.IMPORT_ANY, /.*/, /.*/) \ - }", - "tests/data/tiny-idata-5200"); - - assert_false_rule_file( - "import \"pe\" \ - rule test { \ - condition: \ - pe.imports(pe.IMPORT_ANY, /.*/, /.*CriticalSection/) \ - }", - "tests/data/tiny-idata-5200"); - assert_true_rule( "import \"pe\" \ rule test { \ @@ -329,6 +266,16 @@ int main(int argc, char** argv) }", "tests/data/tiny"); + // Make sure imports with no ordinal and an empty name are skipped. This is + // consistent with the behavior of pefile. + assert_true_rule_file( + "import \"pe\" \ + rule test { \ + condition: \ + pe.imphash() == \"b441b7fd09648ae6a06cea0e090128d6\" \ + }", + "tests/data/tiny_empty_import_name"); + #endif #if defined(HAVE_LIBCRYPTO)