diff --git a/include/retdec/fileformat/types/dotnet_headers/blob_stream.h b/include/retdec/fileformat/types/dotnet_headers/blob_stream.h index c4bfd8913..5bf2d43c9 100644 --- a/include/retdec/fileformat/types/dotnet_headers/blob_stream.h +++ b/include/retdec/fileformat/types/dotnet_headers/blob_stream.h @@ -7,6 +7,7 @@ #ifndef RETDEC_FILEFORMAT_TYPES_DOTNET_HEADERS_BLOB_STREAM_H #define RETDEC_FILEFORMAT_TYPES_DOTNET_HEADERS_BLOB_STREAM_H +#include #include #include "retdec/fileformat/types/dotnet_headers/stream.h" @@ -17,19 +18,11 @@ namespace fileformat { class BlobStream : public Stream { private: - std::unordered_map> elements; + std::vector data; public: - BlobStream(std::uint64_t streamOffset, std::uint64_t streamSize); + BlobStream(std::vector data, std::uint64_t streamOffset, std::uint64_t streamSize); - /// @name Getters - /// @{ std::vector getElement(std::size_t offset) const; - /// @} - - /// @name Element methods - /// @{ - void addElement(std::size_t offset, const std::vector& data); - /// @} }; } // namespace fileformat diff --git a/include/retdec/fileformat/types/dotnet_headers/metadata_tables.h b/include/retdec/fileformat/types/dotnet_headers/metadata_tables.h index e4c541bcf..340d7f581 100644 --- a/include/retdec/fileformat/types/dotnet_headers/metadata_tables.h +++ b/include/retdec/fileformat/types/dotnet_headers/metadata_tables.h @@ -651,6 +651,9 @@ struct TypeDef : public BaseRecord bool isNestedPublic() const { return (flags & TypeVisibilityMask) == TypeNestedPublic; } bool isNestedPrivate() const { return (flags & TypeVisibilityMask) == TypeNestedPrivate; } bool isNestedProtected() const { return (flags & TypeVisibilityMask) == TypeNestedFamily; } + bool isNestedInternal() const { return (flags & TypeVisibilityMask) == TypeNestedAssembly; } + bool isNestedFamOrAssem() const { return (flags & TypeVisibilityMask) == TypeNestedFamORAssem; } + bool isNestedFamAndAssem() const { return (flags & TypeVisibilityMask) == TypeNestedFamANDAssem; } bool isClass() const { return (flags & TypeClassSemanticsMask) == TypeClass; } bool isInterface() const { return (flags & TypeClassSemanticsMask) == TypeInterface; } bool isAbstract() const { return flags & TypeClassAbstract; } @@ -688,6 +691,9 @@ struct Field : public BaseRecord bool isPublic() const { return (flags & FieldAccessMask) == FieldPublic; } bool isProtected() const { return (flags & FieldAccessMask) == FieldFamily; } bool isPrivate() const { return (flags & FieldAccessMask) == FieldPrivate; } + bool isInternal() const { return (flags & FieldAccessMask) == FieldAssembly; } + bool isFamOrAssem() const { return (flags & FieldAccessMask) == FieldFamORAssem; } + bool isFamAndAssem() const { return (flags & FieldAccessMask) == FieldFamANDAssem; } bool isStatic() const { return flags & FieldStatic; } virtual void load(const FileFormat* file, const MetadataStream* stream, std::uint64_t& address) override @@ -720,6 +726,10 @@ struct MethodDef : public BaseRecord bool isPublic() const { return (flags & MethodMemberAccessMask) == MethodPublic; } bool isPrivate() const { return (flags & MethodMemberAccessMask) == MethodPrivate; } bool isProtected() const { return (flags & MethodMemberAccessMask) == MethodFamily; } + bool isInternal() const { return (flags & MethodMemberAccessMask) == MethodAssem; } + bool isFamOrAssem() const { return (flags & MethodMemberAccessMask) == MethodFamORAssem; } + bool isFamAndAssem() const { return (flags & MethodMemberAccessMask) == MethodFamANDAssem; } + bool isStatic() const { return flags & MethodStatic; } bool isVirtual() const { return flags & MethodVirtual; } bool isFinal() const { return flags & MethodFinal; } diff --git a/include/retdec/fileformat/types/dotnet_types/dotnet_class.h b/include/retdec/fileformat/types/dotnet_types/dotnet_class.h index 8c259fb08..5c595428c 100644 --- a/include/retdec/fileformat/types/dotnet_types/dotnet_class.h +++ b/include/retdec/fileformat/types/dotnet_types/dotnet_class.h @@ -94,6 +94,7 @@ class DotnetClass : public DotnetType bool isInterface() const; bool isAbstract() const; bool isSealed() const; + bool isNested() const; /// @} /// @name Additions diff --git a/include/retdec/fileformat/types/dotnet_types/dotnet_type.h b/include/retdec/fileformat/types/dotnet_types/dotnet_type.h index df10e7e54..8000d9fb8 100644 --- a/include/retdec/fileformat/types/dotnet_types/dotnet_type.h +++ b/include/retdec/fileformat/types/dotnet_types/dotnet_type.h @@ -19,8 +19,11 @@ namespace fileformat { enum class DotnetTypeVisibility { Public, + Internal, + Private, Protected, - Private + ProtectedInternal, + PrivateProtected }; /** @@ -56,6 +59,9 @@ class DotnetType bool isPublic() const { return visibility == DotnetTypeVisibility::Public; } bool isProtected() const { return visibility == DotnetTypeVisibility::Protected; } bool isPrivate() const { return visibility == DotnetTypeVisibility::Private; } + bool isInternal() const { return visibility == DotnetTypeVisibility::Internal; } + bool isProtectedInternal() const { return visibility == DotnetTypeVisibility::ProtectedInternal; } + bool isPrivateProtected() const { return visibility == DotnetTypeVisibility::PrivateProtected; } /// @} }; diff --git a/include/retdec/fileformat/types/dotnet_types/dotnet_type_reconstructor.h b/include/retdec/fileformat/types/dotnet_types/dotnet_type_reconstructor.h index e716a384f..bce6ea255 100644 --- a/include/retdec/fileformat/types/dotnet_types/dotnet_type_reconstructor.h +++ b/include/retdec/fileformat/types/dotnet_types/dotnet_type_reconstructor.h @@ -50,7 +50,7 @@ class DotnetTypeReconstructor std::unique_ptr createField(const Field* field, const DotnetClass* ownerClass); std::unique_ptr createProperty(const Property* property, const DotnetClass* ownerClass); std::unique_ptr createMethod(const MethodDef* methodDef, const DotnetClass* ownerClass); - std::unique_ptr createMethodParameter(const Param* param, const DotnetClass* ownerClass, const DotnetMethod* ownerMethod, std::vector& signature); + std::unique_ptr createMethodParameter(std::size_t paramIdx, std::size_t startIdx, const DotnetClass* ownerClass, const DotnetMethod* ownerMethod, std::vector& signature); template std::unique_ptr createDataTypeFollowedByReference(std::vector& data); template std::unique_ptr createDataTypeFollowedByType(std::vector& data, const DotnetClass* ownerClass, const DotnetMethod* ownerMethod); diff --git a/src/fileformat/file_format/pe/pe_format.cpp b/src/fileformat/file_format/pe/pe_format.cpp index b3de60710..aea105b1d 100644 --- a/src/fileformat/file_format/pe/pe_format.cpp +++ b/src/fileformat/file_format/pe/pe_format.cpp @@ -1924,15 +1924,15 @@ void PeFormat::loadDotnetHeaders() return; } - if (streamName == "#~" || streamName == "#-") + if ((streamName == "#~" || streamName == "#-") && !metadataStream) parseMetadataStream(metadataHeaderAddress, streamOffset, streamSize); - else if (streamName == "#Blob") + else if (streamName == "#Blob" && !blobStream) parseBlobStream(metadataHeaderAddress, streamOffset, streamSize); - else if (streamName == "#GUID") + else if (streamName == "#GUID" && !guidStream) parseGuidStream(metadataHeaderAddress, streamOffset, streamSize); - else if (streamName == "#Strings") + else if (streamName == "#Strings" && !stringStream) parseStringStream(metadataHeaderAddress, streamOffset, streamSize); - else if (streamName == "#US") + else if (streamName == "#US" && !userStringStream) parseUserStringStream(metadataHeaderAddress, streamOffset, streamSize); // Round-up to the nearest higher multiple of 4 @@ -2060,6 +2060,13 @@ void PeFormat::parseMetadataStream(std::uint64_t baseAddress, std::uint64_t offs currentAddress += 4; } } + // ExtraData flags means there is extra 4 bytes at the end Rows array that contaisn the rows sizes + // I don't see anything about in at ECMA-335, but I can see in real samples and in IlSpy source + // that understands it and correctly decompiles, sample: 5b5817fe2d4f0989501802b0e2bb4451583ff27fd0723f40bb7f8b0417dd7c58 + if (heapOffsetSizes & 0x40) + { + currentAddress += 4; + } for (std::size_t i = 0; i < 64; ++i) { @@ -2214,55 +2221,11 @@ void PeFormat::parseMetadataStream(std::uint64_t baseAddress, std::uint64_t offs */ void PeFormat::parseBlobStream(std::uint64_t baseAddress, std::uint64_t offset, std::uint64_t size) { - blobStream = std::make_unique(offset, size); + std::vector data; auto address = baseAddress + offset; + getXBytes(address, size, data); + blobStream = std::make_unique(std::move(data), offset, size); - std::vector elementData; - std::uint64_t length, lengthSize; - - std::size_t inStreamOffset = 0; - while (inStreamOffset < size) - { - // First std::uint8_t is length of next element in the blob - lengthSize = 1; - if (!get1Byte(address + inStreamOffset, length)) - { - return; - } - - // 2-std::uint8_t length encoding if the length is 10xxxxxx - if ((length & 0xC0) == 0x80) - { - if (!get2Byte(address + inStreamOffset, length, Endianness::BIG)) - { - return; - } - - length &= ~0xC000; - lengthSize = 2; - } - // 4-std::uint8_t length encoding if the length is 110xxxxx - else if ((length & 0xE0) == 0xC0) - { - if (!get4Byte(address + inStreamOffset, length, Endianness::BIG)) - { - return; - } - - length &= ~0xE0000000; - lengthSize = 4; - } - - // Read only if length is greater than 0 - elementData.clear(); - if (length > 0 && !getXBytes(address + inStreamOffset + lengthSize, length, elementData)) - { - return; - } - - blobStream->addElement(inStreamOffset, elementData); - inStreamOffset += lengthSize + length; - } } /** @@ -2299,14 +2262,8 @@ void PeFormat::parseStringStream(std::uint64_t baseAddress, std::uint64_t offset while (currentOffset < size) { std::string string; - if (!getNTBS(address + currentOffset, string)) - { - currentOffset += 1; - continue; - } - + getNTBS(address + currentOffset, string); stringStream->addString(currentOffset, string); - // +1 for null-terminator currentOffset += 1 + string.length(); } @@ -3191,7 +3148,45 @@ bool PeFormat::initDllList(const std::string & dllListFile) */ bool PeFormat::isDotNet() const { - return clrHeader != nullptr || metadataHeader != nullptr; + if (!clrHeader || !metadataHeader) { + return false; + } + + std::uint32_t correctHdrSize = 72; + if (clrHeader->getHeaderSize() != correctHdrSize) + { + return false; + } + + std::uint32_t numberOfRvaAndSizes = getImageLoader().getOptionalHeader().NumberOfRvaAndSizes; + // If the binary is 64bit, check NumberOfRvaAndSizes, otherwise don't + if (getImageBitability() == 64) + { + if (numberOfRvaAndSizes < PELIB_IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR) + { + return false; + } + } + else if (!isDll()) + { // If 32 bit check if first 2 bytes at entry point are 0xFF 0x25 + + unsigned long long entryAddr = 0; + if (!getEpAddress(entryAddr)) + { + return false; + } + std::uint64_t bytes[2]; + if (!get1Byte(entryAddr, bytes[0]) || !get1Byte(entryAddr + 1, bytes[1])) + { + return false; + } + if (bytes[0] != 0xFF || bytes[1] != 0x25) + { + return false; + } + } + + return true; } /** diff --git a/src/fileformat/types/dotnet_headers/blob_stream.cpp b/src/fileformat/types/dotnet_headers/blob_stream.cpp index 4e33be199..0ef7abf3c 100644 --- a/src/fileformat/types/dotnet_headers/blob_stream.cpp +++ b/src/fileformat/types/dotnet_headers/blob_stream.cpp @@ -5,16 +5,13 @@ */ #include "retdec/fileformat/types/dotnet_headers/blob_stream.h" +#include namespace retdec { namespace fileformat { -/** - * Constructor. - * @param streamOffset Stream offset. - * @param streamSize Stream size. - */ -BlobStream::BlobStream(std::uint64_t streamOffset, std::uint64_t streamSize) : Stream(StreamType::Blob, streamOffset, streamSize) +BlobStream::BlobStream(std::vector data, std::uint64_t streamOffset, std::uint64_t streamSize) + : Stream(StreamType::Blob, streamOffset, streamSize), data(std::move(data)) { } @@ -25,22 +22,63 @@ BlobStream::BlobStream(std::uint64_t streamOffset, std::uint64_t streamSize) : S */ std::vector BlobStream::getElement(std::size_t offset) const { - auto itr = elements.find(offset); - if (itr == elements.end()) + // Adapted from YARA + // https://github.com/VirusTotal/yara/blob/v4.1.2/libyara/modules/dotnet/dotnet.c#L130 + std::uint32_t len = 0; + const unsigned char* ptr = data.data() + offset; + if (offset >= data.size()) + { return {}; + } + // ECMA 335 II.24.2.4 + /* Blob starts with their length in big-endian order + which can be variable in size. We can figure out the + size of the length using first few bits of the first byte. */ + // If first bit is 0, length is encoded in the first byte + else if ((*ptr & 0x80) == 0x00) + { + len = *ptr; + offset += 1; + if (offset + len <= data.size()) + { + return { data.begin() + offset, data.begin() + offset + len }; + } + } + // If first 2 bits are 10, length is stored in 2 bytes + else if ((*ptr & 0xC0) == 0x80) + { + // Make sure we have one more byte. + if (offset + 1 < data.size()) + { + // Shift remaining 6 bits left by 8 and OR in the remaining byte. + len = ((*ptr & 0x3F) << 8) | *(ptr + 1); + offset += 2; + } + if (offset + len <= data.size()) + { + return { data.begin() + offset, data.begin() + offset + len }; + } + } + // If first 3 bits are 110, length is stored in 4 bytes + else if ((*ptr & 0xE0) == 0xC0) + { + // Make sure we have 3 more bytes. + if (offset + 3 < data.size()) + { + // Shift remaining 6 bits left by 8 and OR in the remaining byte. + len = ((*ptr & 0x1F) << 24) | + (*(ptr + 1) << 16) | + (*(ptr + 2) << 8) | + *(ptr + 3); + offset += 4; + } + if (offset + len <= data.size()) + { + return { data.begin() + offset, data.begin() + offset + len }; + } + } - return itr->second; + return {}; } - -/** - * Adds new element at the specified offset. - * @param offset Offset of the element. - * @param data Data of the element. - */ -void BlobStream::addElement(std::size_t offset, const std::vector& data) -{ - elements.emplace(offset, data); -} - } // namespace fileformat } // namespace retdec diff --git a/src/fileformat/types/dotnet_types/dotnet_class.cpp b/src/fileformat/types/dotnet_types/dotnet_class.cpp index b5ab50650..271ba12e8 100644 --- a/src/fileformat/types/dotnet_types/dotnet_class.cpp +++ b/src/fileformat/types/dotnet_types/dotnet_class.cpp @@ -6,6 +6,7 @@ #include "retdec/utils/string.h" #include "retdec/fileformat/types/dotnet_types/dotnet_class.h" +#include using namespace retdec::utils; @@ -412,6 +413,18 @@ bool DotnetClass::isSealed() const return sealed; } +bool DotnetClass::isNested() const +{ + const TypeDef* row = getRawTypeDef(); + if (!row) + { + return false; + } + std::uint32_t flags = getRawTypeDef()->flags; + return (flags & TypeVisibilityMask) != TypeNotPublic && + (flags & TypeVisibilityMask) != TypePublic; +} + /** * Adds the field to the class. * @param field Field to add. diff --git a/src/fileformat/types/dotnet_types/dotnet_type.cpp b/src/fileformat/types/dotnet_types/dotnet_type.cpp index 8258dbb5b..d1a6ba106 100644 --- a/src/fileformat/types/dotnet_types/dotnet_type.cpp +++ b/src/fileformat/types/dotnet_types/dotnet_type.cpp @@ -19,7 +19,10 @@ const std::unordered_mapisPrivate()) return DotnetTypeVisibility::Private; + else if (source->isInternal()) + return DotnetTypeVisibility::Internal; + else if (source->isFamOrAssem()) + return DotnetTypeVisibility::ProtectedInternal; + else if (source->isFamAndAssem()) + return DotnetTypeVisibility::PrivateProtected; else return DotnetTypeVisibility::Private; } @@ -183,16 +189,21 @@ DotnetTypeVisibility toTypeVisibility(const T* source) template <> DotnetTypeVisibility toTypeVisibility(const TypeDef* source) { - if (source->isPublic() || source->isNestedPublic()) - return DotnetTypeVisibility::Public; + if (source->isNonPublic() || source->isNestedInternal()) + return DotnetTypeVisibility::Internal; else if (source->isNestedProtected()) return DotnetTypeVisibility::Protected; - else if (source->isNonPublic() || source->isNestedPrivate()) + else if (source->isNestedPrivate()) return DotnetTypeVisibility::Private; + else if (source->isNestedFamAndAssem()) + return DotnetTypeVisibility::PrivateProtected; + else if (source->isNestedFamOrAssem()) + return DotnetTypeVisibility::ProtectedInternal; + else if (source->isNestedPublic() || source->isPublic()) + return DotnetTypeVisibility::Public; else return DotnetTypeVisibility::Private; } - } /** @@ -496,10 +507,6 @@ bool DotnetTypeReconstructor::reconstructGenericParameters() */ bool DotnetTypeReconstructor::reconstructMethodParameters() { - auto paramTable = static_cast*>(metadataStream->getMetadataTable(MetadataTableType::Param)); - if (paramTable == nullptr) - return true; - // We need to iterate over classes because we need to know the owner of every single method for (const auto& kv : defClassTable) { @@ -511,7 +518,7 @@ bool DotnetTypeReconstructor::reconstructMethodParameters() // Obtain postponed signature // We now know all the information required for method parameters reconstruction auto methodDef = method->getRawRecord(); - auto signature = methodReturnTypeAndParamTypeTable[method.get()]; + auto& signature = methodReturnTypeAndParamTypeTable[method.get()]; // Reconstruct return type auto returnType = dataTypeFromSignature(signature, classType.get(), method.get()); @@ -524,11 +531,7 @@ bool DotnetTypeReconstructor::reconstructMethodParameters() auto startIndex = methodDef->paramList.getIndex(); for (auto i = startIndex; i < startIndex + method->getDeclaredParametersCount(); ++i) { - auto param = paramTable->getRow(i); - if (param == nullptr) - break; - - auto newParam = createMethodParameter(param, classType.get(), method.get(), signature); + auto newParam = createMethodParameter(i, startIndex, classType.get(), method.get(), signature); if (newParam == nullptr) { methodOk = false; @@ -649,14 +652,23 @@ bool DotnetTypeReconstructor::reconstructNestedClasses() auto nestedClass = nestedClassTable->getRow(i); auto nestedItr = defClassTable.find(nestedClass->nestedClass.getIndex()); - if (nestedItr == defClassTable.end()) + // Validate that the type is actually nested + if (nestedItr == defClassTable.end() || !nestedItr->second->isNested()) continue; auto enclosingItr = defClassTable.find(nestedClass->enclosingClass.getIndex()); if (enclosingItr == defClassTable.end()) continue; - nestedItr->second->setNameSpace(enclosingItr->second->getFullyQualifiedName()); + const std::string& namespac = nestedItr->second->getNameSpace(); + if (namespac.empty()) + { + nestedItr->second->setNameSpace(enclosingItr->second->getFullyQualifiedName()); + } + else + { + nestedItr->second->setNameSpace(enclosingItr->second->getFullyQualifiedName() + "." + nestedItr->second->getNameSpace()); + } } return true; @@ -1004,20 +1016,38 @@ std::unique_ptr DotnetTypeReconstructor::createMethod(const Method /** * Creates new method parameter from Param table record. - * @param param Param table record. + * @param paramIdx Index of the current Param record + * @param startIdx Index of the first Param record of the method * @param ownerClass Owning class. * @param ownerMethod Owning method. * @param signature Signature with data types. Is destroyed in the meantime. * @return New method parameter or @c nullptr in case of failure. */ -std::unique_ptr DotnetTypeReconstructor::createMethodParameter(const Param* param, const DotnetClass* ownerClass, +std::unique_ptr DotnetTypeReconstructor::createMethodParameter( + std::size_t paramIdx, std::size_t startIdx, const DotnetClass* ownerClass, const DotnetMethod* ownerMethod, std::vector& signature) { std::string paramName; - if (!stringStream->getString(param->name.getIndex(), paramName)) - return nullptr; - paramName = retdec::utils::replaceNonprintableChars(paramName); + auto paramTable = static_cast*>(metadataStream->getMetadataTable(MetadataTableType::Param)); + const Param* param; + + if (paramTable && (param = paramTable->getRow(paramIdx))) + { + if (!stringStream->getString(param->name.getIndex(), paramName)) { + paramName = retdec::utils::replaceNonprintableChars(paramName); + } + // else leave it empty with just a type + } + // If there is no paramTable, we can still reconstruct everything + // from the signature, except name -> default name + else + { + std::stringstream fmt; + fmt << "P_" << paramIdx - startIdx; + paramName = fmt.str(); + } + auto type = dataTypeFromSignature(signature, ownerClass, ownerMethod); if (type == nullptr) return nullptr; @@ -1162,7 +1192,7 @@ std::unique_ptr DotnetTypeReconstructor::createArray(std::v // Some dimensions can have limited size by declaration // Size 0 means not specified std::uint64_t numOfSizes = decodeUnsigned(data, bytesRead); - if (bytesRead == 0) + if (bytesRead == 0 || numOfSizes > rank) return nullptr; data.erase(data.begin(), data.begin() + bytesRead); @@ -1177,7 +1207,7 @@ std::unique_ptr DotnetTypeReconstructor::createArray(std::v // And some dimensions can also be limited by special lower bound std::size_t numOfLowBounds = decodeUnsigned(data, bytesRead); - if (bytesRead == 0) + if (bytesRead == 0 || numOfLowBounds > rank) return nullptr; data.erase(data.begin(), data.begin() + bytesRead); @@ -1430,6 +1460,7 @@ const DotnetClass* DotnetTypeReconstructor::selectClass(const TypeDefOrRef& type result = itr->second.get(); } + // TODO TypeSpec is missing here return result; }