Skip to content

Commit 7ec26b2

Browse files
authored
[CGData] Stable Function Map (llvm#112662)
These define the main data structures to represent stable functions and group similar functions in a function map. Serialization is supported in a binary or yaml form. Depends on llvm#112638. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
1 parent df9769e commit 7ec26b2

8 files changed

+837
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
//===- StableFunctionMap.h -------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// This defines the StableFunctionMap class, to track similar functions.
10+
// It provides a mechanism to map stable hashes of functions to their
11+
// corresponding metadata. It includes structures for storing function details
12+
// and methods for managing and querying these mappings.
13+
//
14+
//===---------------------------------------------------------------------===//
15+
16+
#ifndef LLVM_CGDATA_STABLEFUNCTIONMAP_H
17+
#define LLVM_CGDATA_STABLEFUNCTIONMAP_H
18+
19+
#include "llvm/ADT/DenseMap.h"
20+
#include "llvm/ADT/StringMap.h"
21+
#include "llvm/IR/StructuralHash.h"
22+
23+
namespace llvm {
24+
25+
using IndexPairHash = std::pair<IndexPair, stable_hash>;
26+
using IndexOperandHashVecType = SmallVector<IndexPairHash>;
27+
28+
/// A stable function is a function with a stable hash while tracking the
29+
/// locations of ignored operands and their hashes.
30+
struct StableFunction {
31+
/// The combined stable hash of the function.
32+
stable_hash Hash;
33+
/// The name of the function.
34+
std::string FunctionName;
35+
/// The name of the module the function is in.
36+
std::string ModuleName;
37+
/// The number of instructions.
38+
unsigned InstCount;
39+
/// A vector of pairs of IndexPair and operand hash which was skipped.
40+
IndexOperandHashVecType IndexOperandHashes;
41+
42+
StableFunction(stable_hash Hash, const std::string FunctionName,
43+
const std::string ModuleName, unsigned InstCount,
44+
IndexOperandHashVecType &&IndexOperandHashes)
45+
: Hash(Hash), FunctionName(FunctionName), ModuleName(ModuleName),
46+
InstCount(InstCount),
47+
IndexOperandHashes(std::move(IndexOperandHashes)) {}
48+
StableFunction() = default;
49+
};
50+
51+
struct StableFunctionMap {
52+
/// An efficient form of StableFunction for fast look-up
53+
struct StableFunctionEntry {
54+
/// The combined stable hash of the function.
55+
stable_hash Hash;
56+
/// Id of the function name.
57+
unsigned FunctionNameId;
58+
/// Id of the module name.
59+
unsigned ModuleNameId;
60+
/// The number of instructions.
61+
unsigned InstCount;
62+
/// A map from an IndexPair to a stable_hash which was skipped.
63+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
64+
65+
StableFunctionEntry(
66+
stable_hash Hash, unsigned FunctionNameId, unsigned ModuleNameId,
67+
unsigned InstCount,
68+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
69+
: Hash(Hash), FunctionNameId(FunctionNameId),
70+
ModuleNameId(ModuleNameId), InstCount(InstCount),
71+
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
72+
};
73+
74+
using HashFuncsMapType =
75+
DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
76+
77+
/// Get the HashToFuncs map for serialization.
78+
const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
79+
80+
/// Get the NameToId vector for serialization.
81+
const SmallVector<std::string> getNames() const { return IdToName; }
82+
83+
/// Get an existing ID associated with the given name or create a new ID if it
84+
/// doesn't exist.
85+
unsigned getIdOrCreateForName(StringRef Name);
86+
87+
/// Get the name associated with a given ID
88+
std::optional<std::string> getNameForId(unsigned Id) const;
89+
90+
/// Insert a `StableFunction` object into the function map. This method
91+
/// handles the uniquing of string names and create a `StableFunctionEntry`
92+
/// for insertion.
93+
void insert(const StableFunction &Func);
94+
95+
/// Merge a \p OtherMap into this function map.
96+
void merge(const StableFunctionMap &OtherMap);
97+
98+
/// \returns true if there is no stable function entry.
99+
bool empty() const { return size() == 0; }
100+
101+
enum SizeType {
102+
UniqueHashCount, // The number of unique hashes in HashToFuncs.
103+
TotalFunctionCount, // The number of total functions in HashToFuncs.
104+
MergeableFunctionCount, // The number of functions that can be merged based
105+
// on their hash.
106+
};
107+
108+
/// \returns the size of StableFunctionMap.
109+
/// \p Type is the type of size to return.
110+
size_t size(SizeType Type = UniqueHashCount) const;
111+
112+
/// Finalize the stable function map by trimming content.
113+
void finalize();
114+
115+
private:
116+
/// Insert a `StableFunctionEntry` into the function map directly. This
117+
/// method assumes that string names have already been uniqued and the
118+
/// `StableFunctionEntry` is ready for insertion.
119+
void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
120+
assert(!Finalized && "Cannot insert after finalization");
121+
HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
122+
}
123+
124+
/// A map from a stable_hash to a vector of functions with that hash.
125+
HashFuncsMapType HashToFuncs;
126+
/// A vector of strings to hold names.
127+
SmallVector<std::string> IdToName;
128+
/// A map from StringRef (name) to an ID.
129+
StringMap<unsigned> NameToId;
130+
/// True if the function map is finalized with minimal content.
131+
bool Finalized = false;
132+
133+
friend struct StableFunctionMapRecord;
134+
};
135+
136+
} // namespace llvm
137+
138+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//===- StableFunctionMapRecord.h -------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// This defines the StableFunctionMapRecord structure, which provides
10+
// functionality for managing and serializing a StableFunctionMap. It includes
11+
// methods for serialization to and from raw and YAML streams, as well as
12+
// utilities for merging and finalizing function maps.
13+
//
14+
//===---------------------------------------------------------------------===//
15+
16+
#ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
17+
#define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
18+
19+
#include "llvm/CGData/StableFunctionMap.h"
20+
#include "llvm/ObjectYAML/YAML.h"
21+
#include "llvm/Support/raw_ostream.h"
22+
23+
namespace llvm {
24+
25+
struct StableFunctionMapRecord {
26+
std::unique_ptr<StableFunctionMap> FunctionMap;
27+
28+
StableFunctionMapRecord() {
29+
FunctionMap = std::make_unique<StableFunctionMap>();
30+
}
31+
32+
StableFunctionMapRecord(std::unique_ptr<StableFunctionMap> FunctionMap)
33+
: FunctionMap(std::move(FunctionMap)) {}
34+
35+
/// A static helper function to serialize the stable function map without
36+
/// owning the stable function map.
37+
static void serialize(raw_ostream &OS, const StableFunctionMap *FunctionMap);
38+
39+
/// Serialize the stable function map to a raw_ostream.
40+
void serialize(raw_ostream &OS) const;
41+
42+
/// Deserialize the stable function map from a raw_ostream.
43+
void deserialize(const unsigned char *&Ptr);
44+
45+
/// Serialize the stable function map to a YAML stream.
46+
void serializeYAML(yaml::Output &YOS) const;
47+
48+
/// Deserialize the stable function map from a YAML stream.
49+
void deserializeYAML(yaml::Input &YIS);
50+
51+
/// Finalize the stable function map by trimming content.
52+
void finalize() { FunctionMap->finalize(); }
53+
54+
/// Merge the stable function map into this one.
55+
void merge(const StableFunctionMapRecord &Other) {
56+
FunctionMap->merge(*Other.FunctionMap);
57+
}
58+
59+
/// \returns true if the stable function map is empty.
60+
bool empty() const { return FunctionMap->empty(); }
61+
62+
/// Print the stable function map in a YAML format.
63+
void print(raw_ostream &OS = llvm::errs()) const {
64+
yaml::Output YOS(OS);
65+
serializeYAML(YOS);
66+
}
67+
};
68+
69+
} // namespace llvm
70+
71+
#endif

llvm/lib/CGData/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ add_llvm_component_library(LLVMCGData
44
CodeGenDataWriter.cpp
55
OutlinedHashTree.cpp
66
OutlinedHashTreeRecord.cpp
7+
StableFunctionMap.cpp
8+
StableFunctionMapRecord.cpp
79

810
ADDITIONAL_HEADER_DIRS
911
${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData

llvm/lib/CGData/StableFunctionMap.cpp

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
//===-- StableFunctionMap.cpp ---------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This implements the functionality for the StableFunctionMap class, which
10+
// manages the mapping of stable function hashes to their metadata. It includes
11+
// methods for inserting, merging, and finalizing function entries, as well as
12+
// utilities for handling function names and IDs.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
#include "llvm/CGData/StableFunctionMap.h"
17+
18+
#define DEBUG_TYPE "stable-function-map"
19+
20+
using namespace llvm;
21+
22+
unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
23+
auto It = NameToId.find(Name);
24+
if (It != NameToId.end())
25+
return It->second;
26+
unsigned Id = IdToName.size();
27+
assert(Id == NameToId.size() && "ID collision");
28+
IdToName.emplace_back(Name.str());
29+
NameToId[IdToName.back()] = Id;
30+
return Id;
31+
}
32+
33+
std::optional<std::string> StableFunctionMap::getNameForId(unsigned Id) const {
34+
if (Id >= IdToName.size())
35+
return std::nullopt;
36+
return IdToName[Id];
37+
}
38+
39+
void StableFunctionMap::insert(const StableFunction &Func) {
40+
assert(!Finalized && "Cannot insert after finalization");
41+
auto FuncNameId = getIdOrCreateForName(Func.FunctionName);
42+
auto ModuleNameId = getIdOrCreateForName(Func.ModuleName);
43+
auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
44+
for (auto &[Index, Hash] : Func.IndexOperandHashes)
45+
(*IndexOperandHashMap)[Index] = Hash;
46+
auto FuncEntry = std::make_unique<StableFunctionEntry>(
47+
Func.Hash, FuncNameId, ModuleNameId, Func.InstCount,
48+
std::move(IndexOperandHashMap));
49+
insert(std::move(FuncEntry));
50+
}
51+
52+
void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
53+
assert(!Finalized && "Cannot merge after finalization");
54+
for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
55+
auto &ThisFuncs = HashToFuncs[Hash];
56+
for (auto &Func : Funcs) {
57+
auto FuncNameId =
58+
getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
59+
auto ModuleNameId =
60+
getIdOrCreateForName(*OtherMap.getNameForId(Func->ModuleNameId));
61+
auto ClonedIndexOperandHashMap =
62+
std::make_unique<IndexOperandHashMapType>(*Func->IndexOperandHashMap);
63+
ThisFuncs.emplace_back(std::make_unique<StableFunctionEntry>(
64+
Func->Hash, FuncNameId, ModuleNameId, Func->InstCount,
65+
std::move(ClonedIndexOperandHashMap)));
66+
}
67+
}
68+
}
69+
70+
size_t StableFunctionMap::size(SizeType Type) const {
71+
switch (Type) {
72+
case UniqueHashCount:
73+
return HashToFuncs.size();
74+
case TotalFunctionCount: {
75+
size_t Count = 0;
76+
for (auto &Funcs : HashToFuncs)
77+
Count += Funcs.second.size();
78+
return Count;
79+
}
80+
case MergeableFunctionCount: {
81+
size_t Count = 0;
82+
for (auto &[Hash, Funcs] : HashToFuncs)
83+
if (Funcs.size() >= 2)
84+
Count += Funcs.size();
85+
return Count;
86+
}
87+
}
88+
llvm_unreachable("Unhandled size type");
89+
}
90+
91+
using ParamLocs = SmallVector<IndexPair>;
92+
static void removeIdenticalIndexPair(
93+
SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> &SFS) {
94+
auto &RSF = SFS[0];
95+
unsigned StableFunctionCount = SFS.size();
96+
97+
SmallVector<IndexPair> ToDelete;
98+
for (auto &[Pair, Hash] : *(RSF->IndexOperandHashMap)) {
99+
bool Identical = true;
100+
for (unsigned J = 1; J < StableFunctionCount; ++J) {
101+
auto &SF = SFS[J];
102+
const auto &SHash = SF->IndexOperandHashMap->at(Pair);
103+
if (Hash != SHash) {
104+
Identical = false;
105+
break;
106+
}
107+
}
108+
109+
// No need to parameterize them if the hashes are identical across stable
110+
// functions.
111+
if (Identical)
112+
ToDelete.emplace_back(Pair);
113+
}
114+
115+
for (auto &Pair : ToDelete)
116+
for (auto &SF : SFS)
117+
SF->IndexOperandHashMap->erase(Pair);
118+
}
119+
120+
void StableFunctionMap::finalize() {
121+
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
122+
auto &[StableHash, SFS] = *It;
123+
124+
// Group stable functions by ModuleIdentifier.
125+
std::stable_sort(SFS.begin(), SFS.end(),
126+
[&](const std::unique_ptr<StableFunctionEntry> &L,
127+
const std::unique_ptr<StableFunctionEntry> &R) {
128+
return *getNameForId(L->ModuleNameId) <
129+
*getNameForId(R->ModuleNameId);
130+
});
131+
132+
// Consider the first function as the root function.
133+
auto &RSF = SFS[0];
134+
135+
bool Invalid = false;
136+
unsigned StableFunctionCount = SFS.size();
137+
for (unsigned I = 1; I < StableFunctionCount; ++I) {
138+
auto &SF = SFS[I];
139+
assert(RSF->Hash == SF->Hash);
140+
if (RSF->InstCount != SF->InstCount) {
141+
Invalid = true;
142+
break;
143+
}
144+
if (RSF->IndexOperandHashMap->size() != SF->IndexOperandHashMap->size()) {
145+
Invalid = true;
146+
break;
147+
}
148+
for (auto &P : *RSF->IndexOperandHashMap) {
149+
auto &InstOpndIndex = P.first;
150+
if (!SF->IndexOperandHashMap->count(InstOpndIndex)) {
151+
Invalid = true;
152+
break;
153+
}
154+
}
155+
}
156+
if (Invalid) {
157+
HashToFuncs.erase(It);
158+
continue;
159+
}
160+
161+
// Trim the index pair that has the same operand hash across
162+
// stable functions.
163+
removeIdenticalIndexPair(SFS);
164+
}
165+
166+
Finalized = true;
167+
}

0 commit comments

Comments
 (0)