Skip to content

Commit

Permalink
[CGData][llvm-cgdata] Support for stable function map
Browse files Browse the repository at this point in the history
This introduces a new cgdata format for stable function maps.
The raw data is embedded in the __llvm_merge section during compile time.
This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal.
  • Loading branch information
kyulee-com committed Oct 17, 2024
1 parent 060a23e commit 09f1ec7
Show file tree
Hide file tree
Showing 21 changed files with 577 additions and 87 deletions.
6 changes: 3 additions & 3 deletions lld/test/MachO/cgdata-generate.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

# RUN: rm -rf %t; split-file %s %t

# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s
# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s

# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o
Expand Down
16 changes: 7 additions & 9 deletions llvm/docs/CommandGuide/llvm-cgdata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,13 @@ SYNOPSIS
DESCRIPTION
-----------

The :program:llvm-cgdata utility parses raw codegen data embedded
in compiled binary files and merges them into a single .cgdata file.
It can also inspect and manipulate .cgdata files.
Currently, the tool supports saving and restoring outlined hash trees,
enabling global function outlining across modules, allowing for more
efficient function outlining in subsequent compilations.
The design is extensible, allowing for the incorporation of additional
codegen summaries and optimization techniques, such as global function
merging, in the future.
The :program:llvm-cgdata utility parses raw codegen data embedded in compiled
binary files and merges them into a single .cgdata file. It can also inspect
and manipulate .cgdata files. Currently, the tool supports saving and restoring
outlined hash trees and stable function maps, allowing for more efficient
function outlining and function merging across modules in subsequent
compilations. The design is extensible, allowing for the incorporation of
additional codegen summaries and optimization techniques.

COMMANDS
--------
Expand Down
24 changes: 23 additions & 1 deletion llvm/include/llvm/CGData/CodeGenData.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CGData/OutlinedHashTree.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
Expand All @@ -41,7 +42,9 @@ enum class CGDataKind {
Unknown = 0x0,
// A function outlining info.
FunctionOutlinedHashTree = 0x1,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
// A function merging info.
StableFunctionMergingMap = 0x2,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
};

const std::error_category &cgdata_category();
Expand Down Expand Up @@ -108,6 +111,8 @@ enum CGDataMode {
class CodeGenData {
/// Global outlined hash tree that has oulined hash sequences across modules.
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
/// Global stable function map that has stable function info across modules.
std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;

/// This flag is set when -fcodegen-data-generate is passed.
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
Expand All @@ -131,6 +136,9 @@ class CodeGenData {
bool hasOutlinedHashTree() {
return PublishedHashTree && !PublishedHashTree->empty();
}
bool hasStableFunctionMap() {
return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
}

/// Returns the outlined hash tree. This can be globally used in a read-only
/// manner.
Expand All @@ -147,6 +155,12 @@ class CodeGenData {
// Ensure we disable emitCGData as we do not want to read and write both.
EmitCGData = false;
}
void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
PublishedStableFunctionMap = std::move(FunctionMap);
// Ensure we disable emitCGData as we do not want to read and write both.
EmitCGData = false;
}
};

namespace cgdata {
Expand All @@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}

inline void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
}

struct StreamCacheData {
/// Backing buffer for serialized data stream.
SmallVector<SmallString<0>> Outputs;
Expand Down Expand Up @@ -249,6 +268,8 @@ enum CGDataVersion {
// Version 1 is the first version. This version supports the outlined
// hash tree.
Version1 = 1,
// Version 2 supports the stable function merging map.
Version2 = 2,
CurrentVersion = CG_DATA_INDEX_VERSION
};
const uint64_t Version = CGDataVersion::CurrentVersion;
Expand All @@ -258,6 +279,7 @@ struct Header {
uint32_t Version;
uint32_t DataKind;
uint64_t OutlinedHashTreeOffset;
uint64_t StableFunctionMapOffset;

// New fields should only be added at the end to ensure that the size
// computation is correct. The methods below need to be updated to ensure that
Expand Down
12 changes: 9 additions & 3 deletions llvm/include/llvm/CGData/CodeGenData.inc
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,33 @@
#define CG_DATA_DEFINED
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
CG_DATA_OUTLINE_COFF, "__DATA,")
CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
CG_DATA_MERGE_COFF, "__DATA,")

#undef CG_DATA_SECT_ENTRY
#endif

/* section name strings common to all targets other
than WIN32 */
#define CG_DATA_OUTLINE_COMMON __llvm_outline
#define CG_DATA_MERGE_COMMON __llvm_merge
/* Since cg data sections are not allocated, we don't need to
* access them at runtime.
*/
#define CG_DATA_OUTLINE_COFF ".loutline"
#define CG_DATA_MERGE_COFF ".lmerge"

#ifdef _WIN32
/* Runtime section names and name strings. */
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF
#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF

#else
/* Runtime section names and name strings. */
#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON)

#endif

/* Indexed codegen data format version (start from 1). */
#define CG_DATA_INDEX_VERSION 1
#define CG_DATA_INDEX_VERSION 2
29 changes: 26 additions & 3 deletions llvm/include/llvm/CGData/CodeGenDataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/VirtualFileSystem.h"

Expand All @@ -36,10 +37,15 @@ class CodeGenDataReader {
virtual CGDataKind getDataKind() const = 0;
/// Return true if the data has an outlined hash tree.
virtual bool hasOutlinedHashTree() const = 0;
/// Return true if the data has a stable function map.
virtual bool hasStableFunctionMap() const = 0;
/// Return the outlined hash tree that is released from the reader.
std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
return std::move(HashTreeRecord.HashTree);
}
std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() {
return std::move(FunctionMapRecord.FunctionMap);
}

/// Factory method to create an appropriately typed reader for the given
/// codegen data file path and file system.
Expand All @@ -56,15 +62,21 @@ class CodeGenDataReader {
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
/// the merged data.
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord,
stable_hash *CombinedHash = nullptr);
static Error
mergeFromObjectFile(const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord,
StableFunctionMapRecord &GlobalFunctionMapRecord,
stable_hash *CombinedHash = nullptr);

protected:
/// The outlined hash tree that has been read. When it's released by
/// releaseOutlinedHashTree(), it's no longer valid.
OutlinedHashTreeRecord HashTreeRecord;

/// The stable function map that has been read. When it's released by
// releaseStableFunctionMap(), it's no longer valid.
StableFunctionMapRecord FunctionMapRecord;

/// Set the current error and return same.
Error error(cgdata_error Err, const std::string &ErrMsg = "") {
LastError = Err;
Expand Down Expand Up @@ -115,6 +127,11 @@ class IndexedCodeGenDataReader : public CodeGenDataReader {
return Header.DataKind &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
bool hasStableFunctionMap() const override {
return Header.DataKind &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}
};

/// This format is a simple text format that's suitable for test data.
Expand Down Expand Up @@ -150,6 +167,12 @@ class TextCodeGenDataReader : public CodeGenDataReader {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
/// This does not mean that the data is still available.
bool hasStableFunctionMap() const override {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}
};

} // end namespace llvm
Expand Down
17 changes: 16 additions & 1 deletion llvm/include/llvm/CGData/CodeGenDataWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"

Expand Down Expand Up @@ -57,16 +58,22 @@ class CodeGenDataWriter {
/// The outlined hash tree to be written.
OutlinedHashTreeRecord HashTreeRecord;

/// The stable function map to be written.
StableFunctionMapRecord FunctionMapRecord;

/// A bit mask describing the kind of the codegen data.
CGDataKind DataKind = CGDataKind::Unknown;

public:
CodeGenDataWriter() = default;
~CodeGenDataWriter() = default;

/// Add the outlined hash tree record. The input Record is released.
/// Add the outlined hash tree record. The input hash tree is released.
void addRecord(OutlinedHashTreeRecord &Record);

/// Add the stable function map record. The input function map is released.
void addRecord(StableFunctionMapRecord &Record);

/// Write the codegen data to \c OS
Error write(raw_fd_ostream &OS);

Expand All @@ -81,11 +88,19 @@ class CodeGenDataWriter {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
bool hasStableFunctionMap() const {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}

private:
/// The offset of the outlined hash tree in the file.
uint64_t OutlinedHashTreeOffset;

/// The offset of the stable function map in the file.
uint64_t StableFunctionMapOffset;

/// Write the codegen data header to \c COS
Error writeHeader(CGDataOStream &COS);

Expand Down
30 changes: 18 additions & 12 deletions llvm/lib/CGData/CodeGenData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() {
auto Reader = ReaderOrErr->get();
if (Reader->hasOutlinedHashTree())
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
if (Reader->hasStableFunctionMap())
Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
}
});
return *(Instance.get());
Expand All @@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
return make_error<CGDataError>(cgdata_error::unsupported_version);
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);

switch (H.Version) {
// When a new field is added to the header add a case statement here to
// compute the size as offset of the new field + size of the new field. This
// relies on the field being added to the end of the list.
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
"Please update the size computation below if a new field has "
"been added to the header, if not add a case statement to "
"fall through to the latest version.");
case 1ull:
H.OutlinedHashTreeOffset =
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
"Please update the offset computation below if a new field has "
"been added to the header.");
H.OutlinedHashTreeOffset =
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
if (H.Version >= 2)
H.StableFunctionMapOffset =
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
}

return H;
}
Expand Down Expand Up @@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,

Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
OutlinedHashTreeRecord GlobalOutlineRecord;
StableFunctionMapRecord GlobalStableFunctionMapRecord;
stable_hash CombinedHash = 0;
for (auto File : ObjFiles) {
if (File.empty())
Expand All @@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {

std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
if (auto E = CodeGenDataReader::mergeFromObjectFile(
Obj.get(), GlobalOutlineRecord, &CombinedHash))
Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
&CombinedHash))
return E;
}

GlobalStableFunctionMapRecord.finalize();

if (!GlobalOutlineRecord.empty())
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
if (!GlobalStableFunctionMapRecord.empty())
cgdata::publishStableFunctionMap(
std::move(GlobalStableFunctionMapRecord.FunctionMap));

return CombinedHash;
}
Expand Down
Loading

0 comments on commit 09f1ec7

Please sign in to comment.