Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CGData][llvm-cgdata] Support for stable function map #112664

Merged
merged 2 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lld/test/MachO/cgdata-generate.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

# RUN: rm -rf %t; split-file %s %t

# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s
# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s

# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o
Expand Down
16 changes: 7 additions & 9 deletions llvm/docs/CommandGuide/llvm-cgdata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,13 @@ SYNOPSIS
DESCRIPTION
-----------

The :program:llvm-cgdata utility parses raw codegen data embedded
in compiled binary files and merges them into a single .cgdata file.
It can also inspect and manipulate .cgdata files.
Currently, the tool supports saving and restoring outlined hash trees,
enabling global function outlining across modules, allowing for more
efficient function outlining in subsequent compilations.
The design is extensible, allowing for the incorporation of additional
codegen summaries and optimization techniques, such as global function
merging, in the future.
The :program:llvm-cgdata utility parses raw codegen data embedded in compiled
binary files and merges them into a single .cgdata file. It can also inspect
and manipulate .cgdata files. Currently, the tool supports saving and restoring
outlined hash trees and stable function maps, allowing for more efficient
function outlining and function merging across modules in subsequent
compilations. The design is extensible, allowing for the incorporation of
additional codegen summaries and optimization techniques.

COMMANDS
--------
Expand Down
24 changes: 23 additions & 1 deletion llvm/include/llvm/CGData/CodeGenData.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CGData/OutlinedHashTree.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
Expand All @@ -41,7 +42,9 @@ enum class CGDataKind {
Unknown = 0x0,
// A function outlining info.
FunctionOutlinedHashTree = 0x1,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
// A function merging info.
StableFunctionMergingMap = 0x2,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
};

const std::error_category &cgdata_category();
Expand Down Expand Up @@ -108,6 +111,8 @@ enum CGDataMode {
class CodeGenData {
/// Global outlined hash tree that has oulined hash sequences across modules.
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
/// Global stable function map that has stable function info across modules.
std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;

/// This flag is set when -fcodegen-data-generate is passed.
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
Expand All @@ -131,6 +136,9 @@ class CodeGenData {
bool hasOutlinedHashTree() {
return PublishedHashTree && !PublishedHashTree->empty();
}
bool hasStableFunctionMap() {
return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
}

/// Returns the outlined hash tree. This can be globally used in a read-only
/// manner.
Expand All @@ -147,6 +155,12 @@ class CodeGenData {
// Ensure we disable emitCGData as we do not want to read and write both.
EmitCGData = false;
}
void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
PublishedStableFunctionMap = std::move(FunctionMap);
// Ensure we disable emitCGData as we do not want to read and write both.
EmitCGData = false;
}
};

namespace cgdata {
Expand All @@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}

inline void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
}

struct StreamCacheData {
/// Backing buffer for serialized data stream.
SmallVector<SmallString<0>> Outputs;
Expand Down Expand Up @@ -249,6 +268,8 @@ enum CGDataVersion {
// Version 1 is the first version. This version supports the outlined
// hash tree.
Version1 = 1,
// Version 2 supports the stable function merging map.
Version2 = 2,
CurrentVersion = CG_DATA_INDEX_VERSION
};
const uint64_t Version = CGDataVersion::CurrentVersion;
Expand All @@ -258,6 +279,7 @@ struct Header {
uint32_t Version;
uint32_t DataKind;
uint64_t OutlinedHashTreeOffset;
uint64_t StableFunctionMapOffset;

// New fields should only be added at the end to ensure that the size
// computation is correct. The methods below need to be updated to ensure that
Expand Down
12 changes: 9 additions & 3 deletions llvm/include/llvm/CGData/CodeGenData.inc
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,33 @@
#define CG_DATA_DEFINED
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
CG_DATA_OUTLINE_COFF, "__DATA,")
CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
CG_DATA_MERGE_COFF, "__DATA,")

#undef CG_DATA_SECT_ENTRY
#endif

/* section name strings common to all targets other
than WIN32 */
#define CG_DATA_OUTLINE_COMMON __llvm_outline
#define CG_DATA_MERGE_COMMON __llvm_merge
/* Since cg data sections are not allocated, we don't need to
* access them at runtime.
*/
#define CG_DATA_OUTLINE_COFF ".loutline"
#define CG_DATA_MERGE_COFF ".lmerge"

#ifdef _WIN32
/* Runtime section names and name strings. */
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF
#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF

#else
/* Runtime section names and name strings. */
#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON)

#endif

/* Indexed codegen data format version (start from 1). */
#define CG_DATA_INDEX_VERSION 1
#define CG_DATA_INDEX_VERSION 2
29 changes: 26 additions & 3 deletions llvm/include/llvm/CGData/CodeGenDataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/VirtualFileSystem.h"

Expand All @@ -36,10 +37,15 @@ class CodeGenDataReader {
virtual CGDataKind getDataKind() const = 0;
/// Return true if the data has an outlined hash tree.
virtual bool hasOutlinedHashTree() const = 0;
/// Return true if the data has a stable function map.
virtual bool hasStableFunctionMap() const = 0;
/// Return the outlined hash tree that is released from the reader.
std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
return std::move(HashTreeRecord.HashTree);
}
std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() {
return std::move(FunctionMapRecord.FunctionMap);
}

/// Factory method to create an appropriately typed reader for the given
/// codegen data file path and file system.
Expand All @@ -56,15 +62,21 @@ class CodeGenDataReader {
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
/// the merged data.
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord,
stable_hash *CombinedHash = nullptr);
static Error
mergeFromObjectFile(const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord,
StableFunctionMapRecord &GlobalFunctionMapRecord,
stable_hash *CombinedHash = nullptr);

protected:
/// The outlined hash tree that has been read. When it's released by
/// releaseOutlinedHashTree(), it's no longer valid.
OutlinedHashTreeRecord HashTreeRecord;

/// The stable function map that has been read. When it's released by
// releaseStableFunctionMap(), it's no longer valid.
StableFunctionMapRecord FunctionMapRecord;

/// Set the current error and return same.
Error error(cgdata_error Err, const std::string &ErrMsg = "") {
LastError = Err;
Expand Down Expand Up @@ -115,6 +127,11 @@ class IndexedCodeGenDataReader : public CodeGenDataReader {
return Header.DataKind &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
bool hasStableFunctionMap() const override {
return Header.DataKind &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}
};

/// This format is a simple text format that's suitable for test data.
Expand Down Expand Up @@ -150,6 +167,12 @@ class TextCodeGenDataReader : public CodeGenDataReader {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
/// This does not mean that the data is still available.
bool hasStableFunctionMap() const override {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}
};

} // end namespace llvm
Expand Down
17 changes: 16 additions & 1 deletion llvm/include/llvm/CGData/CodeGenDataWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"

Expand Down Expand Up @@ -57,16 +58,22 @@ class CodeGenDataWriter {
/// The outlined hash tree to be written.
OutlinedHashTreeRecord HashTreeRecord;

/// The stable function map to be written.
StableFunctionMapRecord FunctionMapRecord;

/// A bit mask describing the kind of the codegen data.
CGDataKind DataKind = CGDataKind::Unknown;

public:
CodeGenDataWriter() = default;
~CodeGenDataWriter() = default;

/// Add the outlined hash tree record. The input Record is released.
/// Add the outlined hash tree record. The input hash tree is released.
void addRecord(OutlinedHashTreeRecord &Record);

/// Add the stable function map record. The input function map is released.
void addRecord(StableFunctionMapRecord &Record);

/// Write the codegen data to \c OS
Error write(raw_fd_ostream &OS);

Expand All @@ -81,11 +88,19 @@ class CodeGenDataWriter {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
bool hasStableFunctionMap() const {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}

private:
/// The offset of the outlined hash tree in the file.
uint64_t OutlinedHashTreeOffset;

/// The offset of the stable function map in the file.
uint64_t StableFunctionMapOffset;

/// Write the codegen data header to \c COS
Error writeHeader(CGDataOStream &COS);

Expand Down
30 changes: 18 additions & 12 deletions llvm/lib/CGData/CodeGenData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() {
auto Reader = ReaderOrErr->get();
if (Reader->hasOutlinedHashTree())
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
if (Reader->hasStableFunctionMap())
Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
}
});
return *Instance;
Expand All @@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
return make_error<CGDataError>(cgdata_error::unsupported_version);
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);

switch (H.Version) {
// When a new field is added to the header add a case statement here to
// compute the size as offset of the new field + size of the new field. This
// relies on the field being added to the end of the list.
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
"Please update the size computation below if a new field has "
"been added to the header, if not add a case statement to "
"fall through to the latest version.");
case 1ull:
H.OutlinedHashTreeOffset =
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
"Please update the offset computation below if a new field has "
"been added to the header.");
H.OutlinedHashTreeOffset =
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
if (H.Version >= 2)
H.StableFunctionMapOffset =
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
}

return H;
}
Expand Down Expand Up @@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,

Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
OutlinedHashTreeRecord GlobalOutlineRecord;
StableFunctionMapRecord GlobalStableFunctionMapRecord;
stable_hash CombinedHash = 0;
for (auto File : ObjFiles) {
if (File.empty())
Expand All @@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {

std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
if (auto E = CodeGenDataReader::mergeFromObjectFile(
Obj.get(), GlobalOutlineRecord, &CombinedHash))
Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
&CombinedHash))
return E;
}

GlobalStableFunctionMapRecord.finalize();

if (!GlobalOutlineRecord.empty())
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
if (!GlobalStableFunctionMapRecord.empty())
cgdata::publishStableFunctionMap(
std::move(GlobalStableFunctionMapRecord.FunctionMap));

return CombinedHash;
}
Expand Down
Loading
Loading