Skip to content

Commit

Permalink
Address comments from teresajohnson
Browse files Browse the repository at this point in the history
  • Loading branch information
kyulee-com committed Oct 5, 2024
1 parent adc6c58 commit d386291
Show file tree
Hide file tree
Showing 11 changed files with 452 additions and 102 deletions.
9 changes: 5 additions & 4 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1321,10 +1321,11 @@ static void runThinLTOBackend(
Conf.CGFileType = getCodeGenFileType(Action);
break;
}
if (Error E = thinBackend(
Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
/* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) {
if (Error E =
thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
/*ModuleMap=*/nullptr, Conf.CodeGenOnly,
/*IRAddStream=*/nullptr, CGOpts.CmdArgs)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
});
Expand Down
52 changes: 46 additions & 6 deletions llvm/include/llvm/CGData/CodeGenData.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
#define LLVM_CGDATA_CODEGENDATA_H

#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CGData/OutlinedHashTree.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TargetParser/Triple.h"
#include <mutex>
Expand Down Expand Up @@ -164,22 +166,60 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}

void initializeTwoCodegenRounds();
struct StreamCacheData {
/// Backing buffer for serialized data stream.
SmallVector<SmallString<0>> Outputs;
/// Callback function to add serialized data to the stream.
AddStreamFn AddStream;
/// Backing buffer for cached data.
SmallVector<std::unique_ptr<MemoryBuffer>> Files;
/// Cache mechanism for storing and retrieving data.
FileCache Cache;

StreamCacheData(unsigned Size) : Outputs(Size), Files(Size) {}
StreamCacheData() = delete;

/// Retrieve results from either the cache or the stream.
SmallVector<StringRef> getResult() {
unsigned NumOutputs = Outputs.size();
SmallVector<StringRef> Result(NumOutputs);
for (unsigned I = 0; I < NumOutputs; ++I)
if (Files[I])
Result[I] = Files[I]->getBuffer();
else
Result[I] = Outputs[I];
return Result;
}
};

/// Establish additional streams and caches for accessing object and IR files.
/// \p OrigCache refers to the original cache used for accessing the final
/// object files, which has already been configured and provided by the linker,
/// if applicable. This cache will be utilized during the second round of the
/// run. Additionally, we add two more caches at the same location for the first
/// round of the run.
void initializeTwoCodegenRounds(StreamCacheData &CG, StreamCacheData &IR,
const FileCache &OrigCache);

/// Save \p TheModule before the first codegen round.
/// \p Task represents the partition number in the parallel code generation
/// process.
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);
/// \p AddStream is the callback used to add the serialized module to the
/// stream.
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
AddStreamFn AddStream);

/// Load the optimized module before the second codegen round.
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
LLVMContext &Context);
LLVMContext &Context,
ArrayRef<StringRef> IRFiles);

/// Merge the codegen data from the input files in scratch vector in ThinLTO
/// two-codegen rounds.
Error mergeCodeGenData(
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles);
/// two-codegen rounds. Optionally, \p CombinedHash can be used to compuate
/// the combined hash of the merged data.
Error mergeCodeGenData(ArrayRef<StringRef> CGFiles,
stable_hash *CombinedHash = nullptr);

void warn(Error E, StringRef Whence = "");
void warn(Twine Message, std::string Whence = "", std::string Hint = "");
Expand Down
5 changes: 4 additions & 1 deletion llvm/include/llvm/CGData/CodeGenDataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,11 @@ class CodeGenDataReader {
/// Extract the cgdata embedded in sections from the given object file and
/// merge them into the GlobalOutlineRecord. This is a static helper that
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
/// the merged data.
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord);
OutlinedHashTreeRecord &GlobalOutlineRecord,
stable_hash *CombinedHash = nullptr);

protected:
/// The outlined hash tree that has been read. When it's released by
Expand Down
6 changes: 4 additions & 2 deletions llvm/include/llvm/LTO/LTO.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,17 @@ void thinLTOInternalizeAndPromoteInIndex(
isPrevailing);

/// Computes a unique hash for the Module considering the current list of
/// export/import and other global analysis results.
/// export/import and other global analysis results. Optionally, \p ExtraID
/// can be used to add an extra identifier to the hash.
std::string computeLTOCacheKey(
const lto::Config &Conf, const ModuleSummaryIndex &Index,
StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
const DenseSet<GlobalValue::GUID> &CfiFunctionDefs = {},
const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {});
const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {},
StringRef ExtraID = {});

namespace lto {

Expand Down
6 changes: 4 additions & 2 deletions llvm/include/llvm/LTO/LTOBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ Error backend(const Config &C, AddStreamFn AddStream,
/// are saved in the ModuleMap. If \p ModuleMap is nullptr, module files will
/// be mapped to memory on demand and at any given time during importing, only
/// one source module will be kept open at the most. If \p CodeGenOnly is true,
/// the backend will skip optimization and only perform code generation.
/// the backend will skip optimization and only perform code generation. If
/// \p IRAddStream is not nullptr, it will be called just before code generation
/// to serialize the optimized IR.
Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
Module &M, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> *ModuleMap,
bool CodeGenOnly,
bool CodeGenOnly, AddStreamFn IRAddStream = nullptr,
const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());

Error finalizeOptimizationRemarks(
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CGData/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ add_llvm_component_library(LLVMCGData
intrinsics_gen

LINK_COMPONENTS
BitReader
BitWriter
Core
Support
Object
Expand Down
99 changes: 58 additions & 41 deletions llvm/lib/CGData/CodeGenData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
Expand All @@ -37,9 +38,6 @@ cl::opt<bool> CodeGenDataThinLTOTwoRounds(
"emits codegen data, while the second round uses the emitted "
"codegen data for further optimizations."));

// Path to where the optimized bitcodes are saved and restored for ThinLTO.
static SmallString<128> CodeGenDataThinLTOTwoRoundsPath;

static std::string getCGDataErrString(cgdata_error Err,
const std::string &ErrMsg = "") {
std::string Msg;
Expand Down Expand Up @@ -224,59 +222,78 @@ void warn(Error E, StringRef Whence) {
}
}

static std::string getPath(StringRef Dir, unsigned Task) {
llvm::SmallString<128> Path(Dir);
llvm::sys::path::append(Path, llvm::Twine(Task) + ".saved_copy.bc");
return std::string(Path);
}

void initializeTwoCodegenRounds() {
void initializeTwoCodegenRounds(StreamCacheData &CG, StreamCacheData &IR,
const FileCache &OrigCache) {
assert(CodeGenDataThinLTOTwoRounds);
if (auto EC = llvm::sys::fs::createUniqueDirectory(
"cgdata", CodeGenDataThinLTOTwoRoundsPath))
report_fatal_error(Twine("Failed to create directory: ") + EC.message());
CG.AddStream = [&](size_t Task, const Twine &ModuleName) {
return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>(CG.Outputs[Task]));
};
IR.AddStream = [&](size_t Task, const Twine &ModuleName) {
return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>(IR.Outputs[Task]));
};

if (OrigCache.isValid()) {
auto CGCacheOrErr =
localCache("ThinLTO", "CG", OrigCache.getCacheDirectoryPath(),
[&](size_t Task, const Twine &ModuleName,
std::unique_ptr<MemoryBuffer> MB) {
CG.Files[Task] = std::move(MB);
});
if (Error Err = CGCacheOrErr.takeError())
report_fatal_error(std::move(Err));
CG.Cache = std::move(*CGCacheOrErr);
auto IRCacheOrErr =
localCache("ThinLTO", "IR", OrigCache.getCacheDirectoryPath(),
[&](size_t Task, const Twine &NoduleName,
std::unique_ptr<MemoryBuffer> MB) {
IR.Files[Task] = std::move(MB);
});
if (Error Err = IRCacheOrErr.takeError())
report_fatal_error(std::move(Err));
IR.Cache = std::move(*IRCacheOrErr);
}
}

void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
std::error_code EC;
raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
if (EC)
report_fatal_error(Twine("Failed to open ") + Path +
" to save optimized bitcode: " + EC.message());
WriteBitcodeToFile(TheModule, OS, /*ShouldPreserveUseListOrder=*/true);
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
AddStreamFn AddStream) {
LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
<< " in Task " << Task << "\n");
Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
AddStream(Task, TheModule.getModuleIdentifier());
if (Error Err = StreamOrErr.takeError())
report_fatal_error(std::move(Err));
std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;

WriteBitcodeToFile(TheModule, *Stream->OS,
/*ShouldPreserveUseListOrder=*/true);
}

std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
LLVMContext &Context) {
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
auto FileOrError = MemoryBuffer::getFile(Path);
if (auto EC = FileOrError.getError())
report_fatal_error(Twine("Failed to open ") + Path +
" to load optimized bitcode: " + EC.message());

std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
LLVMContext &Context,
ArrayRef<StringRef> IRFiles) {
LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
<< " in Task " << Task << "\n");
std::unique_ptr<MemoryBuffer> FileBuffer = MemoryBuffer::getMemBuffer(
IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false);
auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
if (!RestoredModule)
report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
Path + "\n");
report_fatal_error(
Twine("Failed to parse optimized bitcode loaded for Task: ") +
Twine(Task) + "\n");

// Restore the original module identifier.
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
return std::move(*RestoredModule);
}

Error mergeCodeGenData(
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles) {

Error mergeCodeGenData(ArrayRef<StringRef> CGFiles, stable_hash *CombinedHash) {
OutlinedHashTreeRecord GlobalOutlineRecord;
for (auto &InputFile : *(InputFiles)) {
if (InputFile.empty())
for (auto File : CGFiles) {
if (File.empty())
continue;
StringRef File = StringRef(InputFile.data(), InputFile.size());
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
File, "in-memory object file", /*RequiresNullTerminator=*/false);
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
Expand All @@ -285,8 +302,8 @@ Error mergeCodeGenData(
return BinOrErr.takeError();

std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
if (auto E = CodeGenDataReader::mergeFromObjectFile(Obj.get(),
GlobalOutlineRecord))
if (auto E = CodeGenDataReader::mergeFromObjectFile(
Obj.get(), GlobalOutlineRecord, CombinedHash))
return E;
}

Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/CGData/CodeGenDataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
}

Error CodeGenDataReader::mergeFromObjectFile(
const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord) {
const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
stable_hash *CombinedHash) {
Triple TT = Obj->makeTriple();
auto CGOutLineName =
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
Expand All @@ -48,6 +48,9 @@ Error CodeGenDataReader::mergeFromObjectFile(
auto *EndData = Data + ContentsOrErr->size();

if (*NameOrErr == CGOutLineName) {
if (CombinedHash)
*CombinedHash =
stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
// In case dealing with an executable that has concatenated cgdata,
// we want to merge them into a single cgdata.
// Although it's not a typical workflow, we support this scenario.
Expand Down
Loading

0 comments on commit d386291

Please sign in to comment.