Skip to content

Commit bbe8cd1

Browse files
committed
[LTO] Remove module id from summary index
The module paths string table mapped to both an id sequentially assigned during LTO linking, and the module hash. The former is leftover from before the module hash was added for caching and subsequently replaced use of the module id when renaming promoted symbols (to avoid affects due to link order changes). The sequentially assigned module id was not removed, however, as it was still a convenience when serializing to/from bitcode and assembly. This patch removes the module id from this table, since it isn't strictly needed and can lead to confusion on when it is appropriate to use (e.g. see fix in D156525). It also takes a (likely not significant) amount of overhead. Where an integer module id is needed (e.g. bitcode writing), one is assigned on the fly. There are a couple of test changes since the paths are now sorted alphanumerically when assigning ids on the fly during assembly writing, in order to ensure deterministic behavior. Differential Revision: https://reviews.llvm.org/D156730
1 parent b0b3f82 commit bbe8cd1

File tree

14 files changed

+110
-103
lines changed

14 files changed

+110
-103
lines changed

lld/test/COFF/thinlto-index-only.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
; RUN: llvm-ar rcsT %t5.lib %t/bar.obj %t3.obj
5050
; RUN: lld-link -thinlto-index-only -entry:main %t/foo.obj %t5.lib
5151
; RUN: llvm-dis -o - %t/foo.obj.thinlto.bc | FileCheck %s --check-prefix=THINARCHIVE
52-
; THINARCHIVE: ^0 = module: (path: "{{.*}}foo.obj",
53-
; THINARCHIVE: ^1 = module: (path: "{{.*}}bar.obj",
52+
; THINARCHIVE: ^0 = module: (path: "{{.*}}bar.obj",
53+
; THINARCHIVE: ^1 = module: (path: "{{.*}}foo.obj",
5454

5555
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5656
target triple = "x86_64-pc-windows-msvc19.0.24215"

llvm/include/llvm/Bitcode/BitcodeReader.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@ struct ParserCallbacks {
158158
/// into CombinedIndex.
159159
Error
160160
readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
161-
uint64_t ModuleId,
162161
std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
163162
};
164163

@@ -225,8 +224,7 @@ struct ParserCallbacks {
225224

226225
/// Parse the specified bitcode buffer and merge the index into CombinedIndex.
227226
Error readModuleSummaryIndex(MemoryBufferRef Buffer,
228-
ModuleSummaryIndex &CombinedIndex,
229-
uint64_t ModuleId);
227+
ModuleSummaryIndex &CombinedIndex);
230228

231229
/// Parse the module summary index out of an IR file and return the module
232230
/// summary index object if found, or an empty summary if not. If Path refers

llvm/include/llvm/IR/ModuleSummaryIndex.h

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,10 +1225,9 @@ using ModuleHash = std::array<uint32_t, 5>;
12251225
using const_gvsummary_iterator = GlobalValueSummaryMapTy::const_iterator;
12261226
using gvsummary_iterator = GlobalValueSummaryMapTy::iterator;
12271227

1228-
/// String table to hold/own module path strings, which additionally holds the
1229-
/// module ID assigned to each module during the plugin step, as well as a hash
1228+
/// String table to hold/own module path strings, as well as a hash
12301229
/// of the module. The StringMap makes a copy of and owns inserted strings.
1231-
using ModulePathStringTableTy = StringMap<std::pair<uint64_t, ModuleHash>>;
1230+
using ModulePathStringTableTy = StringMap<ModuleHash>;
12321231

12331232
/// Map of global value GUID to its summary, used to identify values defined in
12341233
/// a particular module, and provide efficient access to their summary.
@@ -1674,25 +1673,18 @@ class ModuleSummaryIndex {
16741673
bool PerModuleIndex = true) const;
16751674

16761675
/// Table of modules, containing module hash and id.
1677-
const StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() const {
1676+
const StringMap<ModuleHash> &modulePaths() const {
16781677
return ModulePathStringTable;
16791678
}
16801679

16811680
/// Table of modules, containing hash and id.
1682-
StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() {
1683-
return ModulePathStringTable;
1684-
}
1685-
1686-
/// Get the module ID recorded for the given module path.
1687-
uint64_t getModuleId(const StringRef ModPath) const {
1688-
return ModulePathStringTable.lookup(ModPath).first;
1689-
}
1681+
StringMap<ModuleHash> &modulePaths() { return ModulePathStringTable; }
16901682

16911683
/// Get the module SHA1 hash recorded for the given module path.
16921684
const ModuleHash &getModuleHash(const StringRef ModPath) const {
16931685
auto It = ModulePathStringTable.find(ModPath);
16941686
assert(It != ModulePathStringTable.end() && "Module not registered");
1695-
return It->second.second;
1687+
return It->second;
16961688
}
16971689

16981690
/// Convenience method for creating a promoted global name
@@ -1723,9 +1715,8 @@ class ModuleSummaryIndex {
17231715

17241716
/// Add a new module with the given \p Hash, mapped to the given \p
17251717
/// ModID, and return a reference to the module.
1726-
ModuleInfo *addModule(StringRef ModPath, uint64_t ModId,
1727-
ModuleHash Hash = ModuleHash{{0}}) {
1728-
return &*ModulePathStringTable.insert({ModPath, {ModId, Hash}}).first;
1718+
ModuleInfo *addModule(StringRef ModPath, ModuleHash Hash = ModuleHash{{0}}) {
1719+
return &*ModulePathStringTable.insert({ModPath, Hash}).first;
17291720
}
17301721

17311722
/// Return module entry for module with the given \p ModPath.

llvm/lib/AsmParser/LLParser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8181,7 +8181,7 @@ bool LLParser::parseModuleEntry(unsigned ID) {
81818181
parseToken(lltok::rparen, "expected ')' here"))
81828182
return true;
81838183

8184-
auto ModuleEntry = Index->addModule(Path, ID, Hash);
8184+
auto ModuleEntry = Index->addModule(Path, Hash);
81858185
ModuleIdMap[ID] = ModuleEntry->first();
81868186

81878187
return false;

llvm/lib/Bitcode/Reader/BitcodeReader.cpp

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -904,10 +904,6 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
904904
/// path to the bitcode file.
905905
StringRef ModulePath;
906906

907-
/// For per-module summary indexes, the unique numerical identifier given to
908-
/// this module by the client.
909-
unsigned ModuleId;
910-
911907
/// Callback to ask whether a symbol is the prevailing copy when invoked
912908
/// during combined index building.
913909
std::function<bool(GlobalValue::GUID)> IsPrevailing;
@@ -919,7 +915,7 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
919915
public:
920916
ModuleSummaryIndexBitcodeReader(
921917
BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
922-
StringRef ModulePath, unsigned ModuleId,
918+
StringRef ModulePath,
923919
std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
924920

925921
Error parseModule();
@@ -6699,13 +6695,12 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
66996695

67006696
ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
67016697
BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex,
6702-
StringRef ModulePath, unsigned ModuleId,
6703-
std::function<bool(GlobalValue::GUID)> IsPrevailing)
6698+
StringRef ModulePath, std::function<bool(GlobalValue::GUID)> IsPrevailing)
67046699
: BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex),
6705-
ModulePath(ModulePath), ModuleId(ModuleId), IsPrevailing(IsPrevailing) {}
6700+
ModulePath(ModulePath), IsPrevailing(IsPrevailing) {}
67066701

67076702
void ModuleSummaryIndexBitcodeReader::addThisModule() {
6708-
TheIndex.addModule(ModulePath, ModuleId);
6703+
TheIndex.addModule(ModulePath);
67096704
}
67106705

67116706
ModuleSummaryIndex::ModuleInfo *
@@ -6936,7 +6931,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
69366931
case bitc::MODULE_CODE_HASH: {
69376932
if (Record.size() != 5)
69386933
return error("Invalid hash length " + Twine(Record.size()).str());
6939-
auto &Hash = getThisModule()->second.second;
6934+
auto &Hash = getThisModule()->second;
69406935
int Pos = 0;
69416936
for (auto &Val : Record) {
69426937
assert(!(Val >> 32) && "Unexpected high bits set");
@@ -7697,7 +7692,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
76977692
if (convertToString(Record, 1, ModulePath))
76987693
return error("Invalid record");
76997694

7700-
LastSeenModule = TheIndex.addModule(ModulePath, ModuleId);
7695+
LastSeenModule = TheIndex.addModule(ModulePath);
77017696
ModuleIdMap[ModuleId] = LastSeenModule->first();
77027697

77037698
ModulePath.clear();
@@ -7712,7 +7707,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
77127707
int Pos = 0;
77137708
for (auto &Val : Record) {
77147709
assert(!(Val >> 32) && "Unexpected high bits set");
7715-
LastSeenModule->second.second[Pos++] = Val;
7710+
LastSeenModule->second[Pos++] = Val;
77167711
}
77177712
// Reset LastSeenModule to avoid overriding the hash unexpectedly.
77187713
LastSeenModule = nullptr;
@@ -7970,14 +7965,14 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
79707965
// module path used in the combined summary (e.g. when reading summaries for
79717966
// regular LTO modules).
79727967
Error BitcodeModule::readSummary(
7973-
ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, uint64_t ModuleId,
7968+
ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
79747969
std::function<bool(GlobalValue::GUID)> IsPrevailing) {
79757970
BitstreamCursor Stream(Buffer);
79767971
if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
79777972
return JumpFailed;
79787973

79797974
ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
7980-
ModulePath, ModuleId, IsPrevailing);
7975+
ModulePath, IsPrevailing);
79817976
return R.parseModule();
79827977
}
79837978

@@ -8183,13 +8178,12 @@ Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) {
81838178
}
81848179

81858180
Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer,
8186-
ModuleSummaryIndex &CombinedIndex,
8187-
uint64_t ModuleId) {
8181+
ModuleSummaryIndex &CombinedIndex) {
81888182
Expected<BitcodeModule> BM = getSingleModule(Buffer);
81898183
if (!BM)
81908184
return BM.takeError();
81918185

8192-
return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId);
8186+
return BM->readSummary(CombinedIndex, BM->getModuleIdentifier());
81938187
}
81948188

81958189
Expected<std::unique_ptr<ModuleSummaryIndex>>

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,10 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
431431
/// Tracks the last value id recorded in the GUIDToValueMap.
432432
unsigned GlobalValueId = 0;
433433

434+
/// Tracks the assignment of module paths in the module path string table to
435+
/// an id assigned for use in summary references to the module path.
436+
DenseMap<StringRef, uint64_t> ModuleIdMap;
437+
434438
public:
435439
/// Constructs a IndexBitcodeWriter object for the given combined index,
436440
/// writing to the provided \p Buffer. When writing a subset of the index
@@ -512,8 +516,16 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
512516
Callback(*MPI);
513517
}
514518
} else {
515-
for (const auto &MPSE : Index.modulePaths())
516-
Callback(MPSE);
519+
// Since StringMap iteration order isn't guaranteed, order by path string
520+
// first.
521+
// FIXME: Make this a vector of StringMapEntry instead to avoid the later
522+
// map lookup.
523+
std::vector<StringRef> ModulePaths;
524+
for (auto &[ModPath, _] : Index.modulePaths())
525+
ModulePaths.push_back(ModPath);
526+
llvm::sort(ModulePaths.begin(), ModulePaths.end());
527+
for (auto &ModPath : ModulePaths)
528+
Callback(*Index.modulePaths().find(ModPath));
517529
}
518530
}
519531

@@ -3715,33 +3727,33 @@ void IndexBitcodeWriter::writeModStrings() {
37153727
unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));
37163728

37173729
SmallVector<unsigned, 64> Vals;
3718-
forEachModule(
3719-
[&](const StringMapEntry<std::pair<uint64_t, ModuleHash>> &MPSE) {
3720-
StringRef Key = MPSE.getKey();
3721-
const auto &Value = MPSE.getValue();
3722-
StringEncoding Bits = getStringEncoding(Key);
3723-
unsigned AbbrevToUse = Abbrev8Bit;
3724-
if (Bits == SE_Char6)
3725-
AbbrevToUse = Abbrev6Bit;
3726-
else if (Bits == SE_Fixed7)
3727-
AbbrevToUse = Abbrev7Bit;
3728-
3729-
Vals.push_back(Value.first);
3730-
Vals.append(Key.begin(), Key.end());
3731-
3732-
// Emit the finished record.
3733-
Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
3734-
3735-
// Emit an optional hash for the module now
3736-
const auto &Hash = Value.second;
3737-
if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
3738-
Vals.assign(Hash.begin(), Hash.end());
3739-
// Emit the hash record.
3740-
Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
3741-
}
3730+
forEachModule([&](const StringMapEntry<ModuleHash> &MPSE) {
3731+
StringRef Key = MPSE.getKey();
3732+
const auto &Hash = MPSE.getValue();
3733+
StringEncoding Bits = getStringEncoding(Key);
3734+
unsigned AbbrevToUse = Abbrev8Bit;
3735+
if (Bits == SE_Char6)
3736+
AbbrevToUse = Abbrev6Bit;
3737+
else if (Bits == SE_Fixed7)
3738+
AbbrevToUse = Abbrev7Bit;
37423739

3743-
Vals.clear();
3744-
});
3740+
auto ModuleId = ModuleIdMap.size();
3741+
ModuleIdMap[Key] = ModuleId;
3742+
Vals.push_back(ModuleId);
3743+
Vals.append(Key.begin(), Key.end());
3744+
3745+
// Emit the finished record.
3746+
Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
3747+
3748+
// Emit an optional hash for the module now
3749+
if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
3750+
Vals.assign(Hash.begin(), Hash.end());
3751+
// Emit the hash record.
3752+
Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
3753+
}
3754+
3755+
Vals.clear();
3756+
});
37453757
Stream.ExitBlock();
37463758
}
37473759

@@ -4410,7 +4422,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
44104422

44114423
if (auto *VS = dyn_cast<GlobalVarSummary>(S)) {
44124424
NameVals.push_back(*ValueId);
4413-
NameVals.push_back(Index.getModuleId(VS->modulePath()));
4425+
assert(ModuleIdMap.count(VS->modulePath()));
4426+
NameVals.push_back(ModuleIdMap[VS->modulePath()]);
44144427
NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
44154428
NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
44164429
for (auto &RI : VS->refs()) {
@@ -4460,7 +4473,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
44604473
});
44614474

44624475
NameVals.push_back(*ValueId);
4463-
NameVals.push_back(Index.getModuleId(FS->modulePath()));
4476+
assert(ModuleIdMap.count(FS->modulePath()));
4477+
NameVals.push_back(ModuleIdMap[FS->modulePath()]);
44644478
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
44654479
NameVals.push_back(FS->instCount());
44664480
NameVals.push_back(getEncodedFFlags(FS->fflags()));
@@ -4520,7 +4534,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
45204534
auto AliasValueId = SummaryToValueIdMap[AS];
45214535
assert(AliasValueId);
45224536
NameVals.push_back(AliasValueId);
4523-
NameVals.push_back(Index.getModuleId(AS->modulePath()));
4537+
assert(ModuleIdMap.count(AS->modulePath()));
4538+
NameVals.push_back(ModuleIdMap[AS->modulePath()]);
45244539
NameVals.push_back(getEncodedGVSummaryFlags(AS->flags()));
45254540
auto AliaseeValueId = SummaryToValueIdMap[&AS->getAliasee()];
45264541
assert(AliaseeValueId);

llvm/lib/IR/AsmWriter.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,12 +1069,13 @@ int SlotTracker::processIndex() {
10691069

10701070
// The first block of slots are just the module ids, which start at 0 and are
10711071
// assigned consecutively. Since the StringMap iteration order isn't
1072-
// guaranteed, use a std::map to order by module ID before assigning slots.
1073-
std::map<uint64_t, StringRef> ModuleIdToPathMap;
1074-
for (auto &[ModPath, ModId] : TheIndex->modulePaths())
1075-
ModuleIdToPathMap[ModId.first] = ModPath;
1076-
for (auto &ModPair : ModuleIdToPathMap)
1077-
CreateModulePathSlot(ModPair.second);
1072+
// guaranteed, order by path string before assigning slots.
1073+
std::vector<StringRef> ModulePaths;
1074+
for (auto &[ModPath, _] : TheIndex->modulePaths())
1075+
ModulePaths.push_back(ModPath);
1076+
llvm::sort(ModulePaths.begin(), ModulePaths.end());
1077+
for (auto &ModPath : ModulePaths)
1078+
CreateModulePathSlot(ModPath);
10781079

10791080
// Start numbering the GUIDs after the module ids.
10801081
GUIDNext = ModulePathNext;
@@ -2890,12 +2891,11 @@ void AssemblyWriter::printModuleSummaryIndex() {
28902891
std::string RegularLTOModuleName =
28912892
ModuleSummaryIndex::getRegularLTOModuleName();
28922893
moduleVec.resize(TheIndex->modulePaths().size());
2893-
for (auto &[ModPath, ModId] : TheIndex->modulePaths())
2894+
for (auto &[ModPath, ModHash] : TheIndex->modulePaths())
28942895
moduleVec[Machine.getModulePathSlot(ModPath)] = std::make_pair(
2895-
// A module id of -1 is a special entry for a regular LTO module created
2896-
// during the thin link.
2897-
ModId.first == -1u ? RegularLTOModuleName : std::string(ModPath),
2898-
ModId.second);
2896+
// An empty module path is a special entry for a regular LTO module
2897+
// created during the thin link.
2898+
ModPath.empty() ? RegularLTOModuleName : std::string(ModPath), ModHash);
28992899

29002900
unsigned i = 0;
29012901
for (auto &ModPair : moduleVec) {

llvm/lib/IR/ModuleSummaryIndex.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,17 @@ void ModuleSummaryIndex::exportToDot(
554554
std::map<StringRef, GVSOrderedMapTy> ModuleToDefinedGVS;
555555
collectDefinedGVSummariesPerModule(ModuleToDefinedGVS);
556556

557+
// Assign an id to each module path for use in graph labels. Since the
558+
// StringMap iteration order isn't guaranteed, order by path string before
559+
// assigning ids.
560+
std::vector<StringRef> ModulePaths;
561+
for (auto &[ModPath, _] : modulePaths())
562+
ModulePaths.push_back(ModPath);
563+
llvm::sort(ModulePaths);
564+
DenseMap<StringRef, uint64_t> ModuleIdMap;
565+
for (auto &ModPath : ModulePaths)
566+
ModuleIdMap.try_emplace(ModPath, ModuleIdMap.size());
567+
557568
// Get node identifier in form MXXX_<GUID>. The MXXX prefix is required,
558569
// because we may have multiple linkonce functions summaries.
559570
auto NodeId = [](uint64_t ModId, GlobalValue::GUID Id) {
@@ -589,7 +600,10 @@ void ModuleSummaryIndex::exportToDot(
589600

590601
OS << "digraph Summary {\n";
591602
for (auto &ModIt : ModuleToDefinedGVS) {
592-
auto ModId = getModuleId(ModIt.first);
603+
// Will be empty for a just built per-module index, which doesn't setup a
604+
// module paths table. In that case use 0 as the module id.
605+
assert(ModuleIdMap.count(ModIt.first) || ModuleIdMap.empty());
606+
auto ModId = ModuleIdMap.empty() ? 0 : ModuleIdMap[ModIt.first];
593607
OS << " // Module: " << ModIt.first << "\n";
594608
OS << " subgraph cluster_" << std::to_string(ModId) << " {\n";
595609
OS << " style = filled;\n";

llvm/lib/IRPrinter/IRPrintingPasses.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &AM) {
5353
: nullptr;
5454
if (Index) {
5555
if (Index->modulePaths().empty())
56-
Index->addModule("", 0);
56+
Index->addModule("");
5757
Index->print(OS);
5858
}
5959

0 commit comments

Comments
 (0)