From 53b4e5db21f57a90d79d045e0d4be0d985043553 Mon Sep 17 00:00:00 2001 From: Viraj Shah <42321347+virajbshah@users.noreply.github.com> Date: Mon, 28 Oct 2024 20:36:54 +0530 Subject: [PATCH] Minimize use of `ELF64LEObjectFile` over `ELFObjectFileBase` in `annotating_importer`. (#241) * Minor refactoring of usages of `ELF64LEObjectFile`s to `ELFObjectFileBase`s in `annotating_importer`. * This change is meant to help merge some functionality in `annotating_importer` with `extract_bbs_from_obj_lib`. --- gematria/datasets/annotating_importer.cc | 83 +++++++++++++----------- gematria/datasets/annotating_importer.h | 41 ++++++++++-- 2 files changed, 81 insertions(+), 43 deletions(-) diff --git a/gematria/datasets/annotating_importer.cc b/gematria/datasets/annotating_importer.cc index 21a5f35..9472834 100644 --- a/gematria/datasets/annotating_importer.cc +++ b/gematria/datasets/annotating_importer.cc @@ -101,7 +101,7 @@ absl::Status AnnotatingImporter::LoadBinary(std::string_view file_name) { return absl::OkStatus(); } -absl::StatusOr +absl::StatusOr AnnotatingImporter::GetELFFromBinary() { llvm::object::Binary *binary = owning_binary_.getBinary(); if (!binary->isObject()) { @@ -109,59 +109,32 @@ AnnotatingImporter::GetELFFromBinary() { absl::StrFormat("The given binary (%s) is not an object.", std::string(binary->getFileName()))); } - llvm::object::ObjectFile *object = - llvm::cast(binary); - if (!object) { + auto *object = llvm::cast(binary); + if (object == nullptr) { return absl::InvalidArgumentError( absl::StrFormat("Could not cast the binary (%s) to an ObjectFile.", std::string(binary->getFileName()))); } // Make sure the object is an ELF file. - if (!object->isELF() || !object->is64Bit() || !object->isLittleEndian()) { + if (!object->isELF()) { return absl::InvalidArgumentError( - absl::StrFormat("The given object (%s) is not in ELF64LE format.", + absl::StrFormat("The given object (%s) is not in ELF format.", std::string(binary->getFileName()))); } - auto *elf_object = llvm::dyn_cast(object); - if (!elf_object) { + auto *elf_object = llvm::dyn_cast(object); + if (elf_object == nullptr) { return absl::InvalidArgumentError(absl::StrFormat( - "Could not cast the object (%s) to an ELF64LEObjectFile.", + "Could not cast the object (%s) to an ELFObjectFileBase.", std::string(binary->getFileName()))); } return elf_object; } -absl::StatusOr> -AnnotatingImporter::GetMainProgramHeader( - const llvm::object::ELF64LEObjectFile *elf_object) { - llvm::object::Elf_Phdr_Impl main_header; - bool found_main_header = false; - auto program_headers = elf_object->getELFFile().program_headers(); - if (llvm::Error error = program_headers.takeError()) { - return LlvmErrorToStatus(std::move(error)); - } - for (const auto &program_header : *program_headers) { - if (program_header.p_type == llvm::ELF::PT_LOAD && - program_header.p_flags & llvm::ELF::PF_R && - program_header.p_flags & llvm::ELF::PF_X) { - if (found_main_header) { - return absl::InvalidArgumentError( - "The given object has multiple executable segments. This is " - "currently not supported."); - } - main_header = program_header; - found_main_header = true; - } - } - - return main_header; -} - absl::StatusOr> AnnotatingImporter::GetELFSlice( - const llvm::object::ELF64LEObjectFile *elf_object, uint64_t range_begin, + const llvm::object::ELFObjectFileBase *elf_object, uint64_t range_begin, uint64_t range_end, uint64_t file_offset) { llvm::StringRef binary_buf = elf_object->getData(); @@ -195,7 +168,24 @@ AnnotatingImporter::GetBlocksFromELF() { if (llvm::Error error = bb_addr_map.takeError()) { return LlvmErrorToStatus(std::move(error)); } - const auto main_header = GetMainProgramHeader(*elf_object); + + // TODO(vbshah): Consider making it possible to use other ELFTs rather than + // only ELF64LE since only the implementation of GetMainProgramHeader differs + // between different ELFTs. + if (!(*elf_object)->is64Bit() || !(*elf_object)->isLittleEndian()) { + return absl::InvalidArgumentError( + absl::StrFormat("The given object (%s) is not in ELF64LE format.", + (*elf_object)->getFileName())); + } + auto *typed_elf_object = + llvm::dyn_cast(*elf_object); + if (typed_elf_object == nullptr) { + return absl::InvalidArgumentError(absl::StrFormat( + "Could not cast the ELF object (%s) to an ELF64LEObjectFileBase.", + (*elf_object)->getFileName())); + } + + const auto main_header = GetMainProgramHeader(typed_elf_object); if (!main_header.ok()) { return main_header.status(); } @@ -296,7 +286,24 @@ AnnotatingImporter::GetLBRBlocksWithLatency() { if (!elf_object.ok()) { return elf_object.status(); } - const auto main_header = GetMainProgramHeader(*elf_object); + + // TODO(vbshah): Consider making it possible to use other ELFTs rather than + // only ELF64LE since only the implementation of GetMainProgramHeader differs + // between different ELFTs. + if (!(*elf_object)->is64Bit() || !(*elf_object)->isLittleEndian()) { + return absl::InvalidArgumentError( + absl::StrFormat("The given object (%s) is not in ELF64LE format.", + (*elf_object)->getFileName())); + } + auto *typed_elf_object = + llvm::dyn_cast(*elf_object); + if (typed_elf_object == nullptr) { + return absl::InvalidArgumentError(absl::StrFormat( + "Could not cast the ELF object (%s) to an ELF64LEObjectFileBase.", + (*elf_object)->getFileName())); + } + + const auto main_header = GetMainProgramHeader(typed_elf_object); if (!main_header.ok()) { return main_header.status(); } diff --git a/gematria/datasets/annotating_importer.h b/gematria/datasets/annotating_importer.h index a000b53..39ed465 100644 --- a/gematria/datasets/annotating_importer.h +++ b/gematria/datasets/annotating_importer.h @@ -29,10 +29,13 @@ #include "gematria/datasets/bhive_importer.h" #include "gematria/llvm/canonicalizer.h" #include "gematria/llvm/disassembler.h" +#include "gematria/llvm/llvm_to_absl.h" #include "gematria/proto/throughput.pb.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" +#include "llvm/Support/Error.h" #include "quipper/perf_data.pb.h" #include "quipper/perf_parser.h" #include "quipper/perf_reader.h" @@ -67,16 +70,17 @@ class AnnotatingImporter { // Returns a pointer inside the loaded binary casted down to an ELF object. // The pointer is owned by this instance of `AnnotatingImporter` and may only // be accessed while this is alive. - absl::StatusOr GetELFFromBinary(); + absl::StatusOr GetELFFromBinary(); - // Returns the file offset of the passed ELF object. - absl::StatusOr> - GetMainProgramHeader(const llvm::object::ELF64LEObjectFile* elf_object); + // Returns the program header corresponding to the main executable section. + template + absl::StatusOr> GetMainProgramHeader( + const llvm::object::ELFObjectFile* elf_object); // Disassembles and returns instructions between two addresses in an ELF // object. absl::StatusOr> GetELFSlice( - const llvm::object::ELF64LEObjectFile* elf_object, uint64_t range_begin, + const llvm::object::ELFObjectFileBase* elf_object, uint64_t range_begin, uint64_t range_end, uint64_t file_offset); // Extracts basic blocks from an ELF object, and returns them as tuple @@ -112,6 +116,33 @@ class AnnotatingImporter { llvm::object::OwningBinary owning_binary_; }; +template +absl::StatusOr> +AnnotatingImporter::GetMainProgramHeader( + const llvm::object::ELFObjectFile* elf_object) { + llvm::object::Elf_Phdr_Impl main_header; + bool found_main_header = false; + auto program_headers = elf_object->getELFFile().program_headers(); + if (llvm::Error error = program_headers.takeError()) { + return LlvmErrorToStatus(std::move(error)); + } + for (const auto& program_header : *program_headers) { + if (program_header.p_type == llvm::ELF::PT_LOAD && + program_header.p_flags & llvm::ELF::PF_R && + program_header.p_flags & llvm::ELF::PF_X) { + if (found_main_header) { + return absl::InvalidArgumentError( + "The given object has multiple executable segments. This is " + "currently not supported."); + } + main_header = program_header; + found_main_header = true; + } + } + + return main_header; +} + } // namespace gematria #endif // THIRD_PARTY_GEMATRIA_GEMATRIA_DATASETS_ANNOTATING_IMPORTER_H_