Skip to content

Commit

Permalink
[CGData] Stable Function Map
Browse files Browse the repository at this point in the history
These define the main data structures to represent stable functions and group
similar functions in a function map.
Serialization is supported in a binary or yaml form.
  • Loading branch information
kyulee-com committed Oct 17, 2024
1 parent 6225d74 commit e7272c3
Show file tree
Hide file tree
Showing 8 changed files with 848 additions and 0 deletions.
139 changes: 139 additions & 0 deletions llvm/include/llvm/CGData/StableFunctionMap.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
//===- StableFunctionMap.h -------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// TODO
//
//===---------------------------------------------------------------------===//

#ifndef LLVM_CGDATA_STABLEFUNCTIONMAP_H
#define LLVM_CGDATA_STABLEFUNCTIONMAP_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/ObjectYAML/YAML.h"
#include "llvm/Support/raw_ostream.h"

#include <unordered_map>
#include <vector>

namespace llvm {

using IndexPairHash = std::pair<IndexPair, stable_hash>;
using IndexOperandHashVecType = SmallVector<IndexPairHash>;

/// A stable function is a function with a stable hash while tracking the
/// locations of ignored operands and their hashes.
struct StableFunction {
/// The combined stable hash of the function.
stable_hash Hash;
/// The name of the function.
std::string FunctionName;
/// The name of the module the function is in.
std::string ModuleName;
/// The number of instructions.
unsigned InstCount;
/// A vector of pairs of IndexPair and operand hash which was skipped.
IndexOperandHashVecType IndexOperandHashes;

StableFunction(stable_hash Hash, const std::string FunctionName,
const std::string ModuleName, unsigned InstCount,
IndexOperandHashVecType &&IndexOperandHashes)
: Hash(Hash), FunctionName(FunctionName), ModuleName(ModuleName),
InstCount(InstCount),
IndexOperandHashes(std::move(IndexOperandHashes)) {}
StableFunction() = default;
};

/// An efficient form of StableFunction for fast look-up
struct StableFunctionEntry {
/// The combined stable hash of the function.
stable_hash Hash;
/// Id of the function name.
unsigned FunctionNameId;
/// Id of the module name.
unsigned ModuleNameId;
/// The number of instructions.
unsigned InstCount;
/// A map from an IndexPair to a stable_hash which was skipped.
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;

StableFunctionEntry(
stable_hash Hash, unsigned FunctionNameId, unsigned ModuleNameId,
unsigned InstCount,
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
: Hash(Hash), FunctionNameId(FunctionNameId), ModuleNameId(ModuleNameId),
InstCount(InstCount),
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
};

using HashFuncsMapType =
DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;

class StableFunctionMap {
/// A map from a stable_hash to a vector of functions with that hash.
HashFuncsMapType HashToFuncs;
/// A vector of strings to hold names.
SmallVector<std::string> IdToName;
/// A map from StringRef (name) to an ID.
StringMap<unsigned> NameToId;
/// True if the function map is finalized with minimal content.
bool Finalized = false;

public:
/// Get the HashToFuncs map for serialization.
const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }

/// Get the NameToId vector for serialization.
const SmallVector<std::string> getNames() { return IdToName; }

/// Get an existing ID associated with the given name or create a new ID if it
/// doesn't exist.
unsigned getIdOrCreateForName(StringRef Name);

/// Get the name associated with a given ID
std::optional<std::string> getNameForId(unsigned Id) const;

/// Insert a `StableFunction` object into the function map. This method
/// handles the uniquing of string names and create a `StableFunctionEntry`
/// for insertion.
void insert(const StableFunction &Func);

/// Insert a `StableFunctionEntry` into the function map directly. This
/// method assumes that string names have already been uniqued and the
/// `StableFunctionEntry` is ready for insertion.
void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
assert(!Finalized && "Cannot insert after finalization");
HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
}

/// Merge a \p OtherMap into this function map.
void merge(const StableFunctionMap &OtherMap);

/// \returns true if there is no stable function entry.
bool empty() { return size() == 0; }

enum SizeType {
UniqueHashCount, // The number of unique hashes in HashToFuncs.
TotalFunctionCount, // The number of total functions in HashToFuncs.
MergeableFunctionCount, // The number of functions that can be merged based
// on their hash.
};

/// \returns the size of StableFunctionMap.
/// \p Type is the type of size to return.
size_t size(SizeType Type = UniqueHashCount) const;

/// Finalize the stable function map by trimming content.
void finalize();
};

} // namespace llvm

#endif
64 changes: 64 additions & 0 deletions llvm/include/llvm/CGData/StableFunctionMapRecord.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===- StableFunctionMapRecord.h -------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// TODO
//
//===---------------------------------------------------------------------===//

#ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
#define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H

#include "llvm/CGData/StableFunctionMap.h"

#include <unordered_map>
#include <vector>

namespace llvm {

struct StableFunctionMapRecord {
std::unique_ptr<StableFunctionMap> FunctionMap;

StableFunctionMapRecord() {
FunctionMap = std::make_unique<StableFunctionMap>();
}
StableFunctionMapRecord(std::unique_ptr<StableFunctionMap> FunctionMap)
: FunctionMap(std::move(FunctionMap)) {}

/// Serialize the stable function map to a raw_ostream.
void serialize(raw_ostream &OS) const;

/// Deserialize the stable function map from a raw_ostream.
void deserialize(const unsigned char *&Ptr);

/// Serialize the stable function map to a YAML stream.
void serializeYAML(yaml::Output &YOS) const;

/// Deserialize the stable function map from a YAML stream.
void deserializeYAML(yaml::Input &YIS);

/// Finalize the stable function map by trimming content.
void finalize() { FunctionMap->finalize(); }

/// Merge the stable function map into this one.
void merge(const StableFunctionMapRecord &Other) {
FunctionMap->merge(*Other.FunctionMap);
}

/// \returns true if the stable function map is empty.
bool empty() const { return FunctionMap->empty(); }

/// Print the stable function map in a YAML format.
void print(raw_ostream &OS = llvm::errs()) const {
yaml::Output YOS(OS);
serializeYAML(YOS);
}
};

} // namespace llvm

#endif
2 changes: 2 additions & 0 deletions llvm/lib/CGData/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ add_llvm_component_library(LLVMCGData
CodeGenDataWriter.cpp
OutlinedHashTree.cpp
OutlinedHashTreeRecord.cpp
StableFunctionMap.cpp
StableFunctionMapRecord.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData
Expand Down
167 changes: 167 additions & 0 deletions llvm/lib/CGData/StableFunctionMap.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
//===-- StableFunctionMap.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// TODO
//
//===----------------------------------------------------------------------===//

#include "llvm/CGData/StableFunctionMap.h"

#define DEBUG_TYPE "stable-function-map"

using namespace llvm;

unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
auto It = NameToId.find(Name);
if (It == NameToId.end()) {
unsigned Id = IdToName.size();
assert(Id == NameToId.size() && "ID collision");
IdToName.emplace_back(Name.str());
NameToId[IdToName.back()] = Id;
return Id;
} else {
return It->second;
}
}

std::optional<std::string> StableFunctionMap::getNameForId(unsigned Id) const {
if (Id >= IdToName.size())
return std::nullopt;
return IdToName[Id];
}

void StableFunctionMap::insert(const StableFunction &Func) {
assert(!Finalized && "Cannot insert after finalization");
auto FuncNameId = getIdOrCreateForName(Func.FunctionName);
auto ModuleNameId = getIdOrCreateForName(Func.ModuleName);
auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
for (auto &[Index, Hash] : Func.IndexOperandHashes)
(*IndexOperandHashMap)[Index] = Hash;
auto FuncEntry = std::make_unique<StableFunctionEntry>(
Func.Hash, FuncNameId, ModuleNameId, Func.InstCount,
std::move(IndexOperandHashMap));
insert(std::move(FuncEntry));
}

void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
assert(!Finalized && "Cannot merge after finalization");
for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
auto &ThisFuncs = HashToFuncs[Hash];
for (auto &Func : Funcs) {
auto FuncNameId =
getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
auto ModuleNameId =
getIdOrCreateForName(*OtherMap.getNameForId(Func->ModuleNameId));
auto ClonedIndexOperandHashMap =
std::make_unique<IndexOperandHashMapType>(*Func->IndexOperandHashMap);
ThisFuncs.emplace_back(std::make_unique<StableFunctionEntry>(
Func->Hash, FuncNameId, ModuleNameId, Func->InstCount,
std::move(ClonedIndexOperandHashMap)));
}
}
}

size_t StableFunctionMap::size(SizeType Type) const {
switch (Type) {
case UniqueHashCount:
return HashToFuncs.size();
case TotalFunctionCount: {
size_t Count = 0;
for (auto &Funcs : HashToFuncs)
Count += Funcs.second.size();
return Count;
}
case MergeableFunctionCount: {
size_t Count = 0;
for (auto &[Hash, Funcs] : HashToFuncs)
if (Funcs.size() >= 2)
Count += Funcs.size();
return Count;
}
}
return 0;
}

using ParamLocs = SmallVector<IndexPair>;
static void removeIdenticalIndexPair(
SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS) {
auto &RSF = SFS[0];
unsigned StableFunctionCount = SFS.size();

SmallVector<IndexPair> ToDelete;
for (auto &[Pair, Hash] : *(RSF->IndexOperandHashMap)) {
bool Identical = true;
for (unsigned J = 1; J < StableFunctionCount; ++J) {
auto &SF = SFS[J];
assert(SF->IndexOperandHashMap->count(Pair));
auto SHash = (*SF->IndexOperandHashMap)[Pair];
if (Hash != SHash) {
Identical = false;
break;
}
}

// No need to parameterize them if the hashes are identical across stable
// functions.
if (Identical)
ToDelete.emplace_back(Pair);
}

for (auto &Pair : ToDelete)
for (auto &SF : SFS)
SF->IndexOperandHashMap->erase(Pair);
}

void StableFunctionMap::finalize() {
Finalized = true;

for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
auto &[StableHash, SFS] = *It;

// Group stable functions by ModuleIdentifier.
std::stable_sort(SFS.begin(), SFS.end(),
[&](const std::unique_ptr<StableFunctionEntry> &L,
const std::unique_ptr<StableFunctionEntry> &R) {
return *getNameForId(L->ModuleNameId) <
*getNameForId(R->ModuleNameId);
});

// Consider the first function as the root function.
auto &RSF = SFS[0];

bool IsValid = true;
unsigned StableFunctionCount = SFS.size();
for (unsigned I = 1; I < StableFunctionCount; ++I) {
auto &SF = SFS[I];
assert(RSF->Hash == SF->Hash);
if (RSF->InstCount != SF->InstCount) {
IsValid = false;
break;
}
if (RSF->IndexOperandHashMap->size() != SF->IndexOperandHashMap->size()) {
IsValid = false;
break;
}
for (auto &P : *RSF->IndexOperandHashMap) {
auto &InstOpndIndex = P.first;
if (!SF->IndexOperandHashMap->count(InstOpndIndex)) {
IsValid = false;
break;
}
}
}
if (!IsValid) {
HashToFuncs.erase(It);
continue;
}

// Trim the index pair that has the same operand hash across
// stable functions.
removeIdenticalIndexPair(SFS);
}
}
Loading

0 comments on commit e7272c3

Please sign in to comment.