Skip to content

Commit e7272c3

Browse files
committed
[CGData] Stable Function Map
These define the main data structures to represent stable functions and group similar functions in a function map. Serialization is supported in a binary or yaml form.
1 parent 6225d74 commit e7272c3

8 files changed

+848
-0
lines changed

Diff for: llvm/include/llvm/CGData/StableFunctionMap.h

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
//===- StableFunctionMap.h -------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// TODO
10+
//
11+
//===---------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_CGDATA_STABLEFUNCTIONMAP_H
14+
#define LLVM_CGDATA_STABLEFUNCTIONMAP_H
15+
16+
#include "llvm/ADT/DenseMap.h"
17+
#include "llvm/ADT/StableHashing.h"
18+
#include "llvm/ADT/StringMap.h"
19+
#include "llvm/IR/StructuralHash.h"
20+
#include "llvm/ObjectYAML/YAML.h"
21+
#include "llvm/Support/raw_ostream.h"
22+
23+
#include <unordered_map>
24+
#include <vector>
25+
26+
namespace llvm {
27+
28+
using IndexPairHash = std::pair<IndexPair, stable_hash>;
29+
using IndexOperandHashVecType = SmallVector<IndexPairHash>;
30+
31+
/// A stable function is a function with a stable hash while tracking the
32+
/// locations of ignored operands and their hashes.
33+
struct StableFunction {
34+
/// The combined stable hash of the function.
35+
stable_hash Hash;
36+
/// The name of the function.
37+
std::string FunctionName;
38+
/// The name of the module the function is in.
39+
std::string ModuleName;
40+
/// The number of instructions.
41+
unsigned InstCount;
42+
/// A vector of pairs of IndexPair and operand hash which was skipped.
43+
IndexOperandHashVecType IndexOperandHashes;
44+
45+
StableFunction(stable_hash Hash, const std::string FunctionName,
46+
const std::string ModuleName, unsigned InstCount,
47+
IndexOperandHashVecType &&IndexOperandHashes)
48+
: Hash(Hash), FunctionName(FunctionName), ModuleName(ModuleName),
49+
InstCount(InstCount),
50+
IndexOperandHashes(std::move(IndexOperandHashes)) {}
51+
StableFunction() = default;
52+
};
53+
54+
/// An efficient form of StableFunction for fast look-up
55+
struct StableFunctionEntry {
56+
/// The combined stable hash of the function.
57+
stable_hash Hash;
58+
/// Id of the function name.
59+
unsigned FunctionNameId;
60+
/// Id of the module name.
61+
unsigned ModuleNameId;
62+
/// The number of instructions.
63+
unsigned InstCount;
64+
/// A map from an IndexPair to a stable_hash which was skipped.
65+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
66+
67+
StableFunctionEntry(
68+
stable_hash Hash, unsigned FunctionNameId, unsigned ModuleNameId,
69+
unsigned InstCount,
70+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
71+
: Hash(Hash), FunctionNameId(FunctionNameId), ModuleNameId(ModuleNameId),
72+
InstCount(InstCount),
73+
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
74+
};
75+
76+
using HashFuncsMapType =
77+
DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
78+
79+
class StableFunctionMap {
80+
/// A map from a stable_hash to a vector of functions with that hash.
81+
HashFuncsMapType HashToFuncs;
82+
/// A vector of strings to hold names.
83+
SmallVector<std::string> IdToName;
84+
/// A map from StringRef (name) to an ID.
85+
StringMap<unsigned> NameToId;
86+
/// True if the function map is finalized with minimal content.
87+
bool Finalized = false;
88+
89+
public:
90+
/// Get the HashToFuncs map for serialization.
91+
const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
92+
93+
/// Get the NameToId vector for serialization.
94+
const SmallVector<std::string> getNames() { return IdToName; }
95+
96+
/// Get an existing ID associated with the given name or create a new ID if it
97+
/// doesn't exist.
98+
unsigned getIdOrCreateForName(StringRef Name);
99+
100+
/// Get the name associated with a given ID
101+
std::optional<std::string> getNameForId(unsigned Id) const;
102+
103+
/// Insert a `StableFunction` object into the function map. This method
104+
/// handles the uniquing of string names and create a `StableFunctionEntry`
105+
/// for insertion.
106+
void insert(const StableFunction &Func);
107+
108+
/// Insert a `StableFunctionEntry` into the function map directly. This
109+
/// method assumes that string names have already been uniqued and the
110+
/// `StableFunctionEntry` is ready for insertion.
111+
void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
112+
assert(!Finalized && "Cannot insert after finalization");
113+
HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
114+
}
115+
116+
/// Merge a \p OtherMap into this function map.
117+
void merge(const StableFunctionMap &OtherMap);
118+
119+
/// \returns true if there is no stable function entry.
120+
bool empty() { return size() == 0; }
121+
122+
enum SizeType {
123+
UniqueHashCount, // The number of unique hashes in HashToFuncs.
124+
TotalFunctionCount, // The number of total functions in HashToFuncs.
125+
MergeableFunctionCount, // The number of functions that can be merged based
126+
// on their hash.
127+
};
128+
129+
/// \returns the size of StableFunctionMap.
130+
/// \p Type is the type of size to return.
131+
size_t size(SizeType Type = UniqueHashCount) const;
132+
133+
/// Finalize the stable function map by trimming content.
134+
void finalize();
135+
};
136+
137+
} // namespace llvm
138+
139+
#endif

Diff for: llvm/include/llvm/CGData/StableFunctionMapRecord.h

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//===- StableFunctionMapRecord.h -------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// TODO
10+
//
11+
//===---------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
14+
#define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
15+
16+
#include "llvm/CGData/StableFunctionMap.h"
17+
18+
#include <unordered_map>
19+
#include <vector>
20+
21+
namespace llvm {
22+
23+
struct StableFunctionMapRecord {
24+
std::unique_ptr<StableFunctionMap> FunctionMap;
25+
26+
StableFunctionMapRecord() {
27+
FunctionMap = std::make_unique<StableFunctionMap>();
28+
}
29+
StableFunctionMapRecord(std::unique_ptr<StableFunctionMap> FunctionMap)
30+
: FunctionMap(std::move(FunctionMap)) {}
31+
32+
/// Serialize the stable function map to a raw_ostream.
33+
void serialize(raw_ostream &OS) const;
34+
35+
/// Deserialize the stable function map from a raw_ostream.
36+
void deserialize(const unsigned char *&Ptr);
37+
38+
/// Serialize the stable function map to a YAML stream.
39+
void serializeYAML(yaml::Output &YOS) const;
40+
41+
/// Deserialize the stable function map from a YAML stream.
42+
void deserializeYAML(yaml::Input &YIS);
43+
44+
/// Finalize the stable function map by trimming content.
45+
void finalize() { FunctionMap->finalize(); }
46+
47+
/// Merge the stable function map into this one.
48+
void merge(const StableFunctionMapRecord &Other) {
49+
FunctionMap->merge(*Other.FunctionMap);
50+
}
51+
52+
/// \returns true if the stable function map is empty.
53+
bool empty() const { return FunctionMap->empty(); }
54+
55+
/// Print the stable function map in a YAML format.
56+
void print(raw_ostream &OS = llvm::errs()) const {
57+
yaml::Output YOS(OS);
58+
serializeYAML(YOS);
59+
}
60+
};
61+
62+
} // namespace llvm
63+
64+
#endif

Diff for: llvm/lib/CGData/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ add_llvm_component_library(LLVMCGData
44
CodeGenDataWriter.cpp
55
OutlinedHashTree.cpp
66
OutlinedHashTreeRecord.cpp
7+
StableFunctionMap.cpp
8+
StableFunctionMapRecord.cpp
79

810
ADDITIONAL_HEADER_DIRS
911
${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData

Diff for: llvm/lib/CGData/StableFunctionMap.cpp

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
//===-- StableFunctionMap.cpp ---------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// TODO
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/CGData/StableFunctionMap.h"
14+
15+
#define DEBUG_TYPE "stable-function-map"
16+
17+
using namespace llvm;
18+
19+
unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
20+
auto It = NameToId.find(Name);
21+
if (It == NameToId.end()) {
22+
unsigned Id = IdToName.size();
23+
assert(Id == NameToId.size() && "ID collision");
24+
IdToName.emplace_back(Name.str());
25+
NameToId[IdToName.back()] = Id;
26+
return Id;
27+
} else {
28+
return It->second;
29+
}
30+
}
31+
32+
std::optional<std::string> StableFunctionMap::getNameForId(unsigned Id) const {
33+
if (Id >= IdToName.size())
34+
return std::nullopt;
35+
return IdToName[Id];
36+
}
37+
38+
void StableFunctionMap::insert(const StableFunction &Func) {
39+
assert(!Finalized && "Cannot insert after finalization");
40+
auto FuncNameId = getIdOrCreateForName(Func.FunctionName);
41+
auto ModuleNameId = getIdOrCreateForName(Func.ModuleName);
42+
auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
43+
for (auto &[Index, Hash] : Func.IndexOperandHashes)
44+
(*IndexOperandHashMap)[Index] = Hash;
45+
auto FuncEntry = std::make_unique<StableFunctionEntry>(
46+
Func.Hash, FuncNameId, ModuleNameId, Func.InstCount,
47+
std::move(IndexOperandHashMap));
48+
insert(std::move(FuncEntry));
49+
}
50+
51+
void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
52+
assert(!Finalized && "Cannot merge after finalization");
53+
for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
54+
auto &ThisFuncs = HashToFuncs[Hash];
55+
for (auto &Func : Funcs) {
56+
auto FuncNameId =
57+
getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
58+
auto ModuleNameId =
59+
getIdOrCreateForName(*OtherMap.getNameForId(Func->ModuleNameId));
60+
auto ClonedIndexOperandHashMap =
61+
std::make_unique<IndexOperandHashMapType>(*Func->IndexOperandHashMap);
62+
ThisFuncs.emplace_back(std::make_unique<StableFunctionEntry>(
63+
Func->Hash, FuncNameId, ModuleNameId, Func->InstCount,
64+
std::move(ClonedIndexOperandHashMap)));
65+
}
66+
}
67+
}
68+
69+
size_t StableFunctionMap::size(SizeType Type) const {
70+
switch (Type) {
71+
case UniqueHashCount:
72+
return HashToFuncs.size();
73+
case TotalFunctionCount: {
74+
size_t Count = 0;
75+
for (auto &Funcs : HashToFuncs)
76+
Count += Funcs.second.size();
77+
return Count;
78+
}
79+
case MergeableFunctionCount: {
80+
size_t Count = 0;
81+
for (auto &[Hash, Funcs] : HashToFuncs)
82+
if (Funcs.size() >= 2)
83+
Count += Funcs.size();
84+
return Count;
85+
}
86+
}
87+
return 0;
88+
}
89+
90+
using ParamLocs = SmallVector<IndexPair>;
91+
static void removeIdenticalIndexPair(
92+
SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS) {
93+
auto &RSF = SFS[0];
94+
unsigned StableFunctionCount = SFS.size();
95+
96+
SmallVector<IndexPair> ToDelete;
97+
for (auto &[Pair, Hash] : *(RSF->IndexOperandHashMap)) {
98+
bool Identical = true;
99+
for (unsigned J = 1; J < StableFunctionCount; ++J) {
100+
auto &SF = SFS[J];
101+
assert(SF->IndexOperandHashMap->count(Pair));
102+
auto SHash = (*SF->IndexOperandHashMap)[Pair];
103+
if (Hash != SHash) {
104+
Identical = false;
105+
break;
106+
}
107+
}
108+
109+
// No need to parameterize them if the hashes are identical across stable
110+
// functions.
111+
if (Identical)
112+
ToDelete.emplace_back(Pair);
113+
}
114+
115+
for (auto &Pair : ToDelete)
116+
for (auto &SF : SFS)
117+
SF->IndexOperandHashMap->erase(Pair);
118+
}
119+
120+
void StableFunctionMap::finalize() {
121+
Finalized = true;
122+
123+
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
124+
auto &[StableHash, SFS] = *It;
125+
126+
// Group stable functions by ModuleIdentifier.
127+
std::stable_sort(SFS.begin(), SFS.end(),
128+
[&](const std::unique_ptr<StableFunctionEntry> &L,
129+
const std::unique_ptr<StableFunctionEntry> &R) {
130+
return *getNameForId(L->ModuleNameId) <
131+
*getNameForId(R->ModuleNameId);
132+
});
133+
134+
// Consider the first function as the root function.
135+
auto &RSF = SFS[0];
136+
137+
bool IsValid = true;
138+
unsigned StableFunctionCount = SFS.size();
139+
for (unsigned I = 1; I < StableFunctionCount; ++I) {
140+
auto &SF = SFS[I];
141+
assert(RSF->Hash == SF->Hash);
142+
if (RSF->InstCount != SF->InstCount) {
143+
IsValid = false;
144+
break;
145+
}
146+
if (RSF->IndexOperandHashMap->size() != SF->IndexOperandHashMap->size()) {
147+
IsValid = false;
148+
break;
149+
}
150+
for (auto &P : *RSF->IndexOperandHashMap) {
151+
auto &InstOpndIndex = P.first;
152+
if (!SF->IndexOperandHashMap->count(InstOpndIndex)) {
153+
IsValid = false;
154+
break;
155+
}
156+
}
157+
}
158+
if (!IsValid) {
159+
HashToFuncs.erase(It);
160+
continue;
161+
}
162+
163+
// Trim the index pair that has the same operand hash across
164+
// stable functions.
165+
removeIdenticalIndexPair(SFS);
166+
}
167+
}

0 commit comments

Comments
 (0)