Skip to content

Commit 8e10ed3

Browse files
committed
[CGData] Stable Function Map
These define the main data structures to represent stable functions and group similar functions in a function map. Serialization is supported in a binary or yaml form.
1 parent 0dd9fdc commit 8e10ed3

8 files changed

+864
-0
lines changed

Diff for: llvm/include/llvm/CGData/StableFunctionMap.h

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
//===- StableFunctionMap.h -------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// This defines the StableFunctionMap class, to track similar functions.
10+
// It provides a mechanism to map stable hashes of functions to their
11+
// corresponding metadata. It includes structures for storing function details
12+
// and methods for managing and querying these mappings.
13+
//
14+
//===---------------------------------------------------------------------===//
15+
16+
#ifndef LLVM_CGDATA_STABLEFUNCTIONMAP_H
17+
#define LLVM_CGDATA_STABLEFUNCTIONMAP_H
18+
19+
#include "llvm/ADT/DenseMap.h"
20+
#include "llvm/ADT/StringMap.h"
21+
#include "llvm/IR/StructuralHash.h"
22+
23+
namespace llvm {
24+
25+
using IndexPairHash = std::pair<IndexPair, stable_hash>;
26+
using IndexOperandHashVecType = SmallVector<IndexPairHash>;
27+
28+
/// A stable function is a function with a stable hash while tracking the
29+
/// locations of ignored operands and their hashes.
30+
struct StableFunction {
31+
/// The combined stable hash of the function.
32+
stable_hash Hash;
33+
/// The name of the function.
34+
std::string FunctionName;
35+
/// The name of the module the function is in.
36+
std::string ModuleName;
37+
/// The number of instructions.
38+
unsigned InstCount;
39+
/// A vector of pairs of IndexPair and operand hash which was skipped.
40+
IndexOperandHashVecType IndexOperandHashes;
41+
42+
StableFunction(stable_hash Hash, const std::string FunctionName,
43+
const std::string ModuleName, unsigned InstCount,
44+
IndexOperandHashVecType &&IndexOperandHashes)
45+
: Hash(Hash), FunctionName(FunctionName), ModuleName(ModuleName),
46+
InstCount(InstCount),
47+
IndexOperandHashes(std::move(IndexOperandHashes)) {}
48+
StableFunction() = default;
49+
};
50+
51+
/// An efficient form of StableFunction for fast look-up
52+
struct StableFunctionEntry {
53+
/// The combined stable hash of the function.
54+
stable_hash Hash;
55+
/// Id of the function name.
56+
unsigned FunctionNameId;
57+
/// Id of the module name.
58+
unsigned ModuleNameId;
59+
/// The number of instructions.
60+
unsigned InstCount;
61+
/// A map from an IndexPair to a stable_hash which was skipped.
62+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
63+
64+
StableFunctionEntry(
65+
stable_hash Hash, unsigned FunctionNameId, unsigned ModuleNameId,
66+
unsigned InstCount,
67+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
68+
: Hash(Hash), FunctionNameId(FunctionNameId), ModuleNameId(ModuleNameId),
69+
InstCount(InstCount),
70+
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
71+
};
72+
73+
using HashFuncsMapType =
74+
DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
75+
76+
class StableFunctionMap {
77+
/// A map from a stable_hash to a vector of functions with that hash.
78+
HashFuncsMapType HashToFuncs;
79+
/// A vector of strings to hold names.
80+
SmallVector<std::string> IdToName;
81+
/// A map from StringRef (name) to an ID.
82+
StringMap<unsigned> NameToId;
83+
/// True if the function map is finalized with minimal content.
84+
bool Finalized = false;
85+
86+
public:
87+
/// Get the HashToFuncs map for serialization.
88+
const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
89+
90+
/// Get the NameToId vector for serialization.
91+
const SmallVector<std::string> getNames() const { return IdToName; }
92+
93+
/// Get an existing ID associated with the given name or create a new ID if it
94+
/// doesn't exist.
95+
unsigned getIdOrCreateForName(StringRef Name);
96+
97+
/// Get the name associated with a given ID
98+
std::optional<std::string> getNameForId(unsigned Id) const;
99+
100+
/// Insert a `StableFunction` object into the function map. This method
101+
/// handles the uniquing of string names and create a `StableFunctionEntry`
102+
/// for insertion.
103+
void insert(const StableFunction &Func);
104+
105+
/// Insert a `StableFunctionEntry` into the function map directly. This
106+
/// method assumes that string names have already been uniqued and the
107+
/// `StableFunctionEntry` is ready for insertion.
108+
void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
109+
assert(!Finalized && "Cannot insert after finalization");
110+
HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
111+
}
112+
113+
/// Merge a \p OtherMap into this function map.
114+
void merge(const StableFunctionMap &OtherMap);
115+
116+
/// \returns true if there is no stable function entry.
117+
bool empty() { return size() == 0; }
118+
119+
enum SizeType {
120+
UniqueHashCount, // The number of unique hashes in HashToFuncs.
121+
TotalFunctionCount, // The number of total functions in HashToFuncs.
122+
MergeableFunctionCount, // The number of functions that can be merged based
123+
// on their hash.
124+
};
125+
126+
/// \returns the size of StableFunctionMap.
127+
/// \p Type is the type of size to return.
128+
size_t size(SizeType Type = UniqueHashCount) const;
129+
130+
/// Finalize the stable function map by trimming content.
131+
void finalize();
132+
};
133+
134+
} // namespace llvm
135+
136+
#endif

Diff for: llvm/include/llvm/CGData/StableFunctionMapRecord.h

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//===- StableFunctionMapRecord.h -------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// This defines the StableFunctionMapRecord structure, which provides
10+
// functionality for managing and serializing a StableFunctionMap. It includes
11+
// methods for serialization to and from raw and YAML streams, as well as
12+
// utilities for merging and finalizing function maps.
13+
//
14+
//===---------------------------------------------------------------------===//
15+
16+
#ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
17+
#define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
18+
19+
#include "llvm/CGData/StableFunctionMap.h"
20+
#include "llvm/ObjectYAML/YAML.h"
21+
#include "llvm/Support/raw_ostream.h"
22+
23+
namespace llvm {
24+
25+
struct StableFunctionMapRecord {
26+
std::unique_ptr<StableFunctionMap> FunctionMap;
27+
28+
StableFunctionMapRecord() {
29+
FunctionMap = std::make_unique<StableFunctionMap>();
30+
}
31+
32+
StableFunctionMapRecord(std::unique_ptr<StableFunctionMap> FunctionMap)
33+
: FunctionMap(std::move(FunctionMap)) {}
34+
35+
/// A static helper function to serialize the stable function map without
36+
/// owning the stable function map.
37+
static void serialize(raw_ostream &OS, const StableFunctionMap *FunctionMap);
38+
39+
/// Serialize the stable function map to a raw_ostream.
40+
void serialize(raw_ostream &OS) const;
41+
42+
/// Deserialize the stable function map from a raw_ostream.
43+
void deserialize(const unsigned char *&Ptr);
44+
45+
/// Serialize the stable function map to a YAML stream.
46+
void serializeYAML(yaml::Output &YOS) const;
47+
48+
/// Deserialize the stable function map from a YAML stream.
49+
void deserializeYAML(yaml::Input &YIS);
50+
51+
/// Finalize the stable function map by trimming content.
52+
void finalize() { FunctionMap->finalize(); }
53+
54+
/// Merge the stable function map into this one.
55+
void merge(const StableFunctionMapRecord &Other) {
56+
FunctionMap->merge(*Other.FunctionMap);
57+
}
58+
59+
/// \returns true if the stable function map is empty.
60+
bool empty() const { return FunctionMap->empty(); }
61+
62+
/// Print the stable function map in a YAML format.
63+
void print(raw_ostream &OS = llvm::errs()) const {
64+
yaml::Output YOS(OS);
65+
serializeYAML(YOS);
66+
}
67+
};
68+
69+
} // namespace llvm
70+
71+
#endif

Diff for: llvm/lib/CGData/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ add_llvm_component_library(LLVMCGData
44
CodeGenDataWriter.cpp
55
OutlinedHashTree.cpp
66
OutlinedHashTreeRecord.cpp
7+
StableFunctionMap.cpp
8+
StableFunctionMapRecord.cpp
79

810
ADDITIONAL_HEADER_DIRS
911
${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData

Diff for: llvm/lib/CGData/StableFunctionMap.cpp

+170
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
//===-- StableFunctionMap.cpp ---------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This implements the functionality for the StableFunctionMap class, which
10+
// manages the mapping of stable function hashes to their metadata. It includes
11+
// methods for inserting, merging, and finalizing function entries, as well as
12+
// utilities for handling function names and IDs.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
#include "llvm/CGData/StableFunctionMap.h"
17+
18+
#define DEBUG_TYPE "stable-function-map"
19+
20+
using namespace llvm;
21+
22+
unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
23+
auto It = NameToId.find(Name);
24+
if (It == NameToId.end()) {
25+
unsigned Id = IdToName.size();
26+
assert(Id == NameToId.size() && "ID collision");
27+
IdToName.emplace_back(Name.str());
28+
NameToId[IdToName.back()] = Id;
29+
return Id;
30+
} else {
31+
return It->second;
32+
}
33+
}
34+
35+
std::optional<std::string> StableFunctionMap::getNameForId(unsigned Id) const {
36+
if (Id >= IdToName.size())
37+
return std::nullopt;
38+
return IdToName[Id];
39+
}
40+
41+
void StableFunctionMap::insert(const StableFunction &Func) {
42+
assert(!Finalized && "Cannot insert after finalization");
43+
auto FuncNameId = getIdOrCreateForName(Func.FunctionName);
44+
auto ModuleNameId = getIdOrCreateForName(Func.ModuleName);
45+
auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
46+
for (auto &[Index, Hash] : Func.IndexOperandHashes)
47+
(*IndexOperandHashMap)[Index] = Hash;
48+
auto FuncEntry = std::make_unique<StableFunctionEntry>(
49+
Func.Hash, FuncNameId, ModuleNameId, Func.InstCount,
50+
std::move(IndexOperandHashMap));
51+
insert(std::move(FuncEntry));
52+
}
53+
54+
void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
55+
assert(!Finalized && "Cannot merge after finalization");
56+
for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
57+
auto &ThisFuncs = HashToFuncs[Hash];
58+
for (auto &Func : Funcs) {
59+
auto FuncNameId =
60+
getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
61+
auto ModuleNameId =
62+
getIdOrCreateForName(*OtherMap.getNameForId(Func->ModuleNameId));
63+
auto ClonedIndexOperandHashMap =
64+
std::make_unique<IndexOperandHashMapType>(*Func->IndexOperandHashMap);
65+
ThisFuncs.emplace_back(std::make_unique<StableFunctionEntry>(
66+
Func->Hash, FuncNameId, ModuleNameId, Func->InstCount,
67+
std::move(ClonedIndexOperandHashMap)));
68+
}
69+
}
70+
}
71+
72+
size_t StableFunctionMap::size(SizeType Type) const {
73+
switch (Type) {
74+
case UniqueHashCount:
75+
return HashToFuncs.size();
76+
case TotalFunctionCount: {
77+
size_t Count = 0;
78+
for (auto &Funcs : HashToFuncs)
79+
Count += Funcs.second.size();
80+
return Count;
81+
}
82+
case MergeableFunctionCount: {
83+
size_t Count = 0;
84+
for (auto &[Hash, Funcs] : HashToFuncs)
85+
if (Funcs.size() >= 2)
86+
Count += Funcs.size();
87+
return Count;
88+
}
89+
}
90+
return 0;
91+
}
92+
93+
using ParamLocs = SmallVector<IndexPair>;
94+
static void removeIdenticalIndexPair(
95+
SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS) {
96+
auto &RSF = SFS[0];
97+
unsigned StableFunctionCount = SFS.size();
98+
99+
SmallVector<IndexPair> ToDelete;
100+
for (auto &[Pair, Hash] : *(RSF->IndexOperandHashMap)) {
101+
bool Identical = true;
102+
for (unsigned J = 1; J < StableFunctionCount; ++J) {
103+
auto &SF = SFS[J];
104+
assert(SF->IndexOperandHashMap->count(Pair));
105+
auto SHash = (*SF->IndexOperandHashMap)[Pair];
106+
if (Hash != SHash) {
107+
Identical = false;
108+
break;
109+
}
110+
}
111+
112+
// No need to parameterize them if the hashes are identical across stable
113+
// functions.
114+
if (Identical)
115+
ToDelete.emplace_back(Pair);
116+
}
117+
118+
for (auto &Pair : ToDelete)
119+
for (auto &SF : SFS)
120+
SF->IndexOperandHashMap->erase(Pair);
121+
}
122+
123+
void StableFunctionMap::finalize() {
124+
Finalized = true;
125+
126+
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
127+
auto &[StableHash, SFS] = *It;
128+
129+
// Group stable functions by ModuleIdentifier.
130+
std::stable_sort(SFS.begin(), SFS.end(),
131+
[&](const std::unique_ptr<StableFunctionEntry> &L,
132+
const std::unique_ptr<StableFunctionEntry> &R) {
133+
return *getNameForId(L->ModuleNameId) <
134+
*getNameForId(R->ModuleNameId);
135+
});
136+
137+
// Consider the first function as the root function.
138+
auto &RSF = SFS[0];
139+
140+
bool IsValid = true;
141+
unsigned StableFunctionCount = SFS.size();
142+
for (unsigned I = 1; I < StableFunctionCount; ++I) {
143+
auto &SF = SFS[I];
144+
assert(RSF->Hash == SF->Hash);
145+
if (RSF->InstCount != SF->InstCount) {
146+
IsValid = false;
147+
break;
148+
}
149+
if (RSF->IndexOperandHashMap->size() != SF->IndexOperandHashMap->size()) {
150+
IsValid = false;
151+
break;
152+
}
153+
for (auto &P : *RSF->IndexOperandHashMap) {
154+
auto &InstOpndIndex = P.first;
155+
if (!SF->IndexOperandHashMap->count(InstOpndIndex)) {
156+
IsValid = false;
157+
break;
158+
}
159+
}
160+
}
161+
if (!IsValid) {
162+
HashToFuncs.erase(It);
163+
continue;
164+
}
165+
166+
// Trim the index pair that has the same operand hash across
167+
// stable functions.
168+
removeIdenticalIndexPair(SFS);
169+
}
170+
}

0 commit comments

Comments
 (0)