Skip to content

Commit

Permalink
[CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds
Browse files Browse the repository at this point in the history
  • Loading branch information
kyulee-com committed Sep 16, 2024
1 parent c1a0219 commit a566ab0
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 6 deletions.
16 changes: 16 additions & 0 deletions llvm/include/llvm/CGData/CodeGenData.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}

/// Initialize the two-codegen rounds.
void initializeTwoCodegenRounds();

/// Save the current module before the first codegen round.
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);

/// Load the current module before the second codegen round.
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
LLVMContext &Context);

/// Merge the codegen data from the input files in scratch vector in ThinLTO
/// two-codegen rounds.
Error mergeCodeGenData(
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles);

void warn(Error E, StringRef Whence = "");
void warn(Twine Message, std::string Whence = "", std::string Hint = "");

Expand Down
81 changes: 80 additions & 1 deletion llvm/lib/CGData/CodeGenData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/WithColor.h"

#define DEBUG_TYPE "cg-data"
Expand All @@ -30,6 +31,14 @@ cl::opt<bool>
cl::opt<std::string>
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
cl::desc("File path to where .cgdata file is read"));
cl::opt<bool> CodeGenDataThinLTOTwoRounds(
"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden,
cl::desc("Enable two-round ThinLTO code generation. The first round "
"emits codegen data, while the second round uses the emitted "
"codegen data for further optimizations."));

// Path to where the optimized bitcodes are saved and restored for ThinLTO.
static SmallString<128> CodeGenDataThinLTOTwoRoundsPath;

static std::string getCGDataErrString(cgdata_error Err,
const std::string &ErrMsg = "") {
Expand Down Expand Up @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() {
std::call_once(CodeGenData::OnceFlag, []() {
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());

if (CodeGenDataGenerate)
if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds)
Instance->EmitCGData = true;
else if (!CodeGenDataUsePath.empty()) {
// Initialize the global CGData if the input file name is given.
Expand Down Expand Up @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) {
}
}

static std::string getPath(StringRef Dir, unsigned Task) {
return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str();
}

void initializeTwoCodegenRounds() {
assert(CodeGenDataThinLTOTwoRounds);
if (auto EC = llvm::sys::fs::createUniqueDirectory(
"cgdata", CodeGenDataThinLTOTwoRoundsPath))
report_fatal_error(Twine("Failed to create directory: ") + EC.message());
}

void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
std::error_code EC;
raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
if (EC)
report_fatal_error(Twine("Failed to open ") + Path +
" to save optimized bitcode: " + EC.message());
WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true);
}

std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
LLVMContext &Context) {
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
auto FileOrError = MemoryBuffer::getFile(Path);
if (auto EC = FileOrError.getError())
report_fatal_error(Twine("Failed to open ") + Path +
" to load optimized bitcode: " + EC.message());

std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
auto RestoredModule = llvm::parseBitcodeFile(*FileBuffer, Context);
if (!RestoredModule)
report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
Path + "\n");

// Restore the original module identifier.
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
return std::move(*RestoredModule);
}

Error mergeCodeGenData(
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles) {

OutlinedHashTreeRecord GlobalOutlineRecord;
for (auto &InputFile : *(InputFiles)) {
if (InputFile.empty())
continue;
StringRef File = StringRef(InputFile.data(), InputFile.size());
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
File, "in-memory object file", /*RequiresNullTerminator=*/false);
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
object::ObjectFile::createObjectFile(Buffer->getMemBufferRef());
if (!BinOrErr)
return BinOrErr.takeError();

std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
if (auto E = CodeGenDataReader::mergeFromObjectFile(Obj.get(),
GlobalOutlineRecord))
return E;
}

if (!GlobalOutlineRecord.empty())
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));

return Error::success();
}

} // end namespace cgdata

} // end namespace llvm
1 change: 1 addition & 0 deletions llvm/lib/LTO/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_llvm_component_library(LLVMLTO
BinaryFormat
BitReader
BitWriter
CGData
CodeGen
CodeGenTypes
Core
Expand Down
103 changes: 98 additions & 5 deletions llvm/lib/LTO/LTO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AutoUpgrade.h"
Expand Down Expand Up @@ -70,6 +71,8 @@ static cl::opt<bool>
DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));

extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;

namespace llvm {
/// Enable global value internalization in LTO.
cl::opt<bool> EnableLTOInternalization(
Expand Down Expand Up @@ -1458,7 +1461,7 @@ class InProcessThinBackend : public ThinBackendProc {
GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
}

Error runThinLTOBackendThread(
virtual Error runThinLTOBackendThread(
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
Expand Down Expand Up @@ -1559,6 +1562,60 @@ class InProcessThinBackend : public ThinBackendProc {
return BackendThreadPool.getMaxConcurrency();
}
};

/// This Backend will run ThinBackend process but throw away all the output from
/// the codegen. This class facilitates the first codegen round.
class NoOutputThinBackend : public InProcessThinBackend {
public:
NoOutputThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch)
: InProcessThinBackend(
Conf, CombinedIndex, ThinLTOParallelism, ModuleToDefinedGVSummaries,
// Allocate a scratch buffer for each task to write output to.
[Allocation = &*Scratch](unsigned Task, const Twine &ModuleName) {
return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>((*Allocation)[Task]));
},
FileCache(), nullptr, false, false),
Scratch(std::move(Scratch)) {}

/// Scratch space for writing output during the codegen.
std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch;
};

/// This Backend performs codegen on bitcode that was previously saved after
/// going through optimization. This class facilitates the second codegen round.
class OptimizedBitcodeThinBackend : public InProcessThinBackend {
public:
OptimizedBitcodeThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream)
: InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
ModuleToDefinedGVSummaries, AddStream, FileCache(),
nullptr, false, false) {}

virtual Error runThinLTOBackendThread(
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
LTOLLVMContext BackendContext(Conf);
std::unique_ptr<Module> LoadedModule =
cgdata::loadModuleForTwoRounds(BM, Task, BackendContext);

return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex,
ImportList, DefinedGlobals, &ModuleMap,
/*CodeGenOnly=*/true);
}
};
} // end anonymous namespace

ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
Expand Down Expand Up @@ -1879,10 +1936,46 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
return BackendProcess->wait();
};

std::unique_ptr<ThinBackendProc> BackendProc =
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
AddStream, Cache);
return RunBackends(BackendProc.get());
if (!CodeGenDataThinLTOTwoRounds) {
std::unique_ptr<ThinBackendProc> BackendProc =
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
AddStream, Cache);
return RunBackends(BackendProc.get());
}

// Perform two rounds of code generation for ThinLTO:
// 1. First round: Run optimization and code generation with a scratch output.
// 2. Merge codegen data extracted from the scratch output.
// 3. Second round: Run code generation again using the merged data.
LLVM_DEBUG(dbgs() << "Running ThinLTO two-codegen rounds\n");

// Initialize a temporary path to store and retrieve optimized IRs for
// two-round code generation.
cgdata::initializeTwoCodegenRounds();

// Create a scratch output to hold intermediate results.
auto Outputs =
std::make_unique<std::vector<llvm::SmallString<0>>>(getMaxTasks());
auto FirstRoundLTO = std::make_unique<NoOutputThinBackend>(
Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
ModuleToDefinedGVSummaries, std::move(Outputs));
// First round: Run optimization and code generation with a scratch output.
// Before code generation, serialize modules.
if (Error E = RunBackends(FirstRoundLTO.get()))
return E;

// Merge codegen data extracted from the scratch output.
if (Error E = cgdata::mergeCodeGenData(std::move(FirstRoundLTO->Scratch)))
return E;

// Second round: Run code generation by reading IRs.
std::unique_ptr<ThinBackendProc> SecondRoundLTO =
std::make_unique<OptimizedBitcodeThinBackend>(
Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
ModuleToDefinedGVSummaries, AddStream);
Error E = RunBackends(SecondRoundLTO.get());

return E;
}

Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/LTO/LTOBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/CGData/CodeGenData.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
Expand Down Expand Up @@ -74,6 +75,8 @@ static cl::opt<bool> ThinLTOAssumeMerged(
cl::desc("Assume the input has already undergone ThinLTO function "
"importing and the other pre-optimization pipeline changes."));

extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;

namespace llvm {
extern cl::opt<bool> NoPGOWarnMismatch;
}
Expand Down Expand Up @@ -599,11 +602,19 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
auto OptimizeAndCodegen =
[&](Module &Mod, TargetMachine *TM,
std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) {
// Perform optimization and code generation for ThinLTO.
if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
/*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
CmdArgs))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));

// Save the current module before the first codegen round.
// Note that the second codegen round runs only `codegen()` without
// running `opt()`. We're not reaching here as it's bailed out earlier
// with CodeGenOnly which has been set in `OptimizedBitcodeThinBackend`.
if (CodeGenDataThinLTOTwoRounds)
cgdata::saveModuleForTwoRounds(Mod, Task);

codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
};
Expand Down
Loading

0 comments on commit a566ab0

Please sign in to comment.