-
Notifications
You must be signed in to change notification settings - Fork 14.5k
MC: Store fragment content and fixups out-of-line #146307
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MC: Store fragment content and fixups out-of-line #146307
Conversation
Created using spr 1.3.5-bogner
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-risc-v Author: Fangrui Song (MaskRay) ChangesMoved For appending content to the current section, use Moving only Contents out-of-line caused a slight performance regression Now using a plain SmallVector in MCSection for storage, with potential Co-authored-by: Alexis Engelke <engelke@in.tum.de> Patch is 37.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146307.diff 13 Files Affected:
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index 2affa85c744ba..789f9a0e602b2 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -30,6 +30,9 @@ class MCSection;
class MCSubtargetInfo;
class MCSymbol;
+// Represents a contiguous segment of code or data within a section. Its size is
+// determined by MCAssembler::layout. All subclasses (except
+// MCRelaxableFragment, which stores a MCInst) must have trivial destructors.
class MCFragment {
friend class MCAssembler;
friend class MCObjectStreamer;
@@ -86,12 +89,6 @@ class MCFragment {
MCFragment(const MCFragment &) = delete;
MCFragment &operator=(const MCFragment &) = delete;
- /// Destroys the current fragment.
- ///
- /// This must be used instead of delete as MCFragment is non-virtual.
- /// This method will dispatch to the appropriate subclass.
- LLVM_ABI void destroy();
-
MCFragment *getNext() const { return Next; }
FragmentType getKind() const { return Kind; }
@@ -113,10 +110,7 @@ class MCFragment {
/// Interface implemented by fragments that contain encoded instructions and/or
/// data.
-///
class MCEncodedFragment : public MCFragment {
- uint8_t BundlePadding = 0;
-
protected:
MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions)
: MCFragment(FType, HasInstructions) {}
@@ -125,6 +119,13 @@ class MCEncodedFragment : public MCFragment {
/// It must be non-null for instructions.
const MCSubtargetInfo *STI = nullptr;
+private:
+ uint32_t ContentStart = 0;
+ uint32_t ContentSize = 0;
+ uint32_t FixupStart = 0;
+ uint32_t FixupSize = 0;
+ uint8_t BundlePadding = 0;
+
public:
static bool classof(const MCFragment *F) {
MCFragment::FragmentType Kind = F->getKind();
@@ -136,6 +137,7 @@ class MCEncodedFragment : public MCFragment {
case MCFragment::FT_Dwarf:
case MCFragment::FT_DwarfFrame:
case MCFragment::FT_PseudoProbe:
+ case MCFragment::FT_CVInlineLines:
return true;
}
}
@@ -165,48 +167,33 @@ class MCEncodedFragment : public MCFragment {
HasInstructions = true;
this->STI = &STI;
}
-};
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data and also have fixups registered.
-///
-template <unsigned ContentsSize, unsigned FixupsSize>
-class MCEncodedFragmentWithFixups : public MCEncodedFragment {
- SmallVector<char, ContentsSize> Contents;
-
- /// The list of fixups in this fragment.
- SmallVector<MCFixup, FixupsSize> Fixups;
-
-protected:
- MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
- bool HasInstructions)
- : MCEncodedFragment(FType, HasInstructions) {}
-
-public:
- SmallVectorImpl<char> &getContents() { return Contents; }
- const SmallVectorImpl<char> &getContents() const { return Contents; }
-
- void appendContents(ArrayRef<char> C) { Contents.append(C.begin(), C.end()); }
- void appendContents(size_t Num, char Elt) { Contents.append(Num, Elt); }
- void setContents(ArrayRef<char> C) { Contents.assign(C.begin(), C.end()); }
-
- void addFixup(MCFixup Fixup) { Fixups.push_back(Fixup); }
- SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
- const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
-
- static bool classof(const MCFragment *F) {
- MCFragment::FragmentType Kind = F->getKind();
- return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data ||
- Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf ||
- Kind == MCFragment::FT_DwarfFrame;
- }
+ // Content-related functions manage parent's storage using ContentStart and
+ // ContentSize.
+ void clearContents() { ContentSize = 0; }
+ SmallVectorImpl<char> &getContentsForAppending();
+ void doneAppending();
+ void appendContents(ArrayRef<char> Contents);
+ void appendContents(size_t Num, char Elt);
+ void setContents(ArrayRef<char> Contents);
+ MutableArrayRef<char> getContents();
+ ArrayRef<char> getContents() const;
+
+ // Fixup-related functions manage parent's storage using FixupStart and
+ // FixupSize.
+ void clearFixups() { FixupSize = 0; }
+ void addFixup(MCFixup Fixup);
+ void appendFixups(ArrayRef<MCFixup> Fixups);
+ void setFixups(ArrayRef<MCFixup> Fixups);
+ MutableArrayRef<MCFixup> getFixups();
+ ArrayRef<MCFixup> getFixups() const;
};
/// Fragment for data and encoded instructions.
///
-class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
+class MCDataFragment : public MCEncodedFragment {
public:
- MCDataFragment() : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false) {}
+ MCDataFragment() : MCEncodedFragment(FT_Data, false) {}
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Data;
@@ -219,13 +206,13 @@ class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
/// A relaxable fragment holds on to its MCInst, since it may need to be
/// relaxed during the assembler layout and relaxation stage.
///
-class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> {
+class MCRelaxableFragment : public MCEncodedFragment {
/// The instruction this is a fragment for.
MCInst Inst;
public:
MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI)
- : MCEncodedFragmentWithFixups(FT_Relaxable, true), Inst(Inst) {
+ : MCEncodedFragment(FT_Relaxable, true), Inst(Inst) {
this->STI = &STI;
}
@@ -374,7 +361,7 @@ class MCOrgFragment : public MCFragment {
}
};
-class MCLEBFragment final : public MCEncodedFragmentWithFixups<8, 0> {
+class MCLEBFragment final : public MCEncodedFragment {
/// True if this is a sleb128, false if uleb128.
bool IsSigned;
@@ -383,24 +370,19 @@ class MCLEBFragment final : public MCEncodedFragmentWithFixups<8, 0> {
public:
MCLEBFragment(const MCExpr &Value, bool IsSigned)
- : MCEncodedFragmentWithFixups<8, 0>(FT_LEB, false), IsSigned(IsSigned),
- Value(&Value) {
- getContents().push_back(0);
- }
+ : MCEncodedFragment(FT_LEB, false), IsSigned(IsSigned), Value(&Value) {}
const MCExpr &getValue() const { return *Value; }
void setValue(const MCExpr *Expr) { Value = Expr; }
bool isSigned() const { return IsSigned; }
- /// @}
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_LEB;
}
};
-class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
+class MCDwarfLineAddrFragment : public MCEncodedFragment {
/// The value of the difference between the two line numbers
/// between two .loc dwarf directives.
int64_t LineDelta;
@@ -411,8 +393,8 @@ class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
public:
MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta)
- : MCEncodedFragmentWithFixups<8, 1>(FT_Dwarf, false),
- LineDelta(LineDelta), AddrDelta(&AddrDelta) {}
+ : MCEncodedFragment(FT_Dwarf, false), LineDelta(LineDelta),
+ AddrDelta(&AddrDelta) {}
int64_t getLineDelta() const { return LineDelta; }
@@ -423,15 +405,14 @@ class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
}
};
-class MCDwarfCallFrameFragment : public MCEncodedFragmentWithFixups<8, 1> {
+class MCDwarfCallFrameFragment : public MCEncodedFragment {
/// The expression for the difference of the two symbols that
/// make up the address delta between two .cfi_* dwarf directives.
const MCExpr *AddrDelta;
public:
MCDwarfCallFrameFragment(const MCExpr &AddrDelta)
- : MCEncodedFragmentWithFixups<8, 1>(FT_DwarfFrame, false),
- AddrDelta(&AddrDelta) {}
+ : MCEncodedFragment(FT_DwarfFrame, false), AddrDelta(&AddrDelta) {}
const MCExpr &getAddrDelta() const { return *AddrDelta; }
void setAddrDelta(const MCExpr *E) { AddrDelta = E; }
@@ -459,13 +440,12 @@ class MCSymbolIdFragment : public MCFragment {
/// Fragment representing the binary annotations produced by the
/// .cv_inline_linetable directive.
-class MCCVInlineLineTableFragment : public MCFragment {
+class MCCVInlineLineTableFragment : public MCEncodedFragment {
unsigned SiteFuncId;
unsigned StartFileId;
unsigned StartLineNum;
const MCSymbol *FnStartSym;
const MCSymbol *FnEndSym;
- SmallString<8> Contents;
/// CodeViewContext has the real knowledge about this format, so let it access
/// our members.
@@ -475,23 +455,20 @@ class MCCVInlineLineTableFragment : public MCFragment {
MCCVInlineLineTableFragment(unsigned SiteFuncId, unsigned StartFileId,
unsigned StartLineNum, const MCSymbol *FnStartSym,
const MCSymbol *FnEndSym)
- : MCFragment(FT_CVInlineLines, false), SiteFuncId(SiteFuncId),
+ : MCEncodedFragment(FT_CVInlineLines, false), SiteFuncId(SiteFuncId),
StartFileId(StartFileId), StartLineNum(StartLineNum),
FnStartSym(FnStartSym), FnEndSym(FnEndSym) {}
const MCSymbol *getFnStartSym() const { return FnStartSym; }
const MCSymbol *getFnEndSym() const { return FnEndSym; }
- SmallString<8> &getContents() { return Contents; }
- const SmallString<8> &getContents() const { return Contents; }
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_CVInlineLines;
}
};
/// Fragment representing the .cv_def_range directive.
-class MCCVDefRangeFragment : public MCEncodedFragmentWithFixups<32, 4> {
+class MCCVDefRangeFragment : public MCEncodedFragment {
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
StringRef FixedSizePortion;
@@ -503,8 +480,9 @@ class MCCVDefRangeFragment : public MCEncodedFragmentWithFixups<32, 4> {
MCCVDefRangeFragment(
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion)
- : MCEncodedFragmentWithFixups<32, 4>(FT_CVDefRange, false),
- Ranges(Ranges), FixedSizePortion(FixedSizePortion) {}
+ : MCEncodedFragment(FT_CVDefRange, false),
+ Ranges(Ranges.begin(), Ranges.end()),
+ FixedSizePortion(FixedSizePortion) {}
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> getRanges() const {
return Ranges;
@@ -556,15 +534,14 @@ class MCBoundaryAlignFragment : public MCFragment {
}
};
-class MCPseudoProbeAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
+class MCPseudoProbeAddrFragment : public MCEncodedFragment {
/// The expression for the difference of the two symbols that
/// make up the address delta between two .pseudoprobe directives.
const MCExpr *AddrDelta;
public:
MCPseudoProbeAddrFragment(const MCExpr *AddrDelta)
- : MCEncodedFragmentWithFixups<8, 1>(FT_PseudoProbe, false),
- AddrDelta(AddrDelta) {}
+ : MCEncodedFragment(FT_PseudoProbe, false), AddrDelta(AddrDelta) {}
const MCExpr &getAddrDelta() const { return *AddrDelta; }
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index a4e391dfb03ea..fb59c74b7c484 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -39,6 +39,7 @@ class LLVM_ABI MCSection {
public:
friend MCAssembler;
friend MCObjectStreamer;
+ friend class MCEncodedFragment;
static constexpr unsigned NonUniqueID = ~0U;
enum SectionVariant {
@@ -116,6 +117,10 @@ class LLVM_ABI MCSection {
// subsections.
SmallVector<std::pair<unsigned, FragList>, 1> Subsections;
+ // Content and fixup storage for fragments
+ SmallVector<char, 0> ContentStorage;
+ SmallVector<MCFixup, 0> FixupStorage;
+
protected:
// TODO Make Name private when possible.
StringRef Name;
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index bd2242af23f7c..f3ad5b6d71b80 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -607,12 +607,14 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
case MCFragment::FT_Data:
++stats::EmittedDataFragments;
- OS << cast<MCDataFragment>(F).getContents();
+ OS << StringRef(cast<MCDataFragment>(F).getContents().data(),
+ cast<MCDataFragment>(F).getContents().size());
break;
case MCFragment::FT_Relaxable:
++stats::EmittedRelaxableFragments;
- OS << cast<MCRelaxableFragment>(F).getContents();
+ OS << StringRef(cast<MCRelaxableFragment>(F).getContents().data(),
+ cast<MCRelaxableFragment>(F).getContents().size());
break;
case MCFragment::FT_Fill: {
@@ -692,7 +694,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
case MCFragment::FT_LEB: {
const MCLEBFragment &LF = cast<MCLEBFragment>(F);
- OS << LF.getContents();
+ OS << StringRef(LF.getContents().data(), LF.getContents().size());
break;
}
@@ -722,27 +724,27 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
case MCFragment::FT_Dwarf: {
const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
- OS << OF.getContents();
+ OS << StringRef(OF.getContents().data(), OF.getContents().size());
break;
}
case MCFragment::FT_DwarfFrame: {
const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
- OS << CF.getContents();
+ OS << StringRef(CF.getContents().data(), CF.getContents().size());
break;
}
case MCFragment::FT_CVInlineLines: {
const auto &OF = cast<MCCVInlineLineTableFragment>(F);
- OS << OF.getContents();
+ OS << StringRef(OF.getContents().data(), OF.getContents().size());
break;
}
case MCFragment::FT_CVDefRange: {
const auto &DRF = cast<MCCVDefRangeFragment>(F);
- OS << DRF.getContents();
+ OS << StringRef(DRF.getContents().data(), DRF.getContents().size());
break;
}
case MCFragment::FT_PseudoProbe: {
const MCPseudoProbeAddrFragment &PF = cast<MCPseudoProbeAddrFragment>(F);
- OS << PF.getContents();
+ OS << StringRef(PF.getContents().data(), PF.getContents().size());
break;
}
}
@@ -994,10 +996,11 @@ bool MCAssembler::relaxInstruction(MCRelaxableFragment &F) {
// Encode the new instruction.
F.setInst(Relaxed);
- F.getFixups().clear();
- F.getContents().clear();
- getEmitter().encodeInstruction(Relaxed, F.getContents(), F.getFixups(),
- *F.getSubtargetInfo());
+ SmallVector<char, 16> Data;
+ SmallVector<MCFixup, 1> Fixups;
+ getEmitter().encodeInstruction(Relaxed, Data, Fixups, *F.getSubtargetInfo());
+ F.setContents(Data);
+ F.setFixups(Fixups);
return true;
}
@@ -1005,8 +1008,7 @@ bool MCAssembler::relaxLEB(MCLEBFragment &LF) {
const unsigned OldSize = static_cast<unsigned>(LF.getContents().size());
unsigned PadTo = OldSize;
int64_t Value;
- SmallVectorImpl<char> &Data = LF.getContents();
- LF.getFixups().clear();
+ LF.clearFixups();
// Use evaluateKnownAbsolute for Mach-O as a hack: .subsections_via_symbols
// requires that .uleb128 A-B is foldable where A and B reside in different
// fragments. This is used by __gcc_except_table.
@@ -1027,7 +1029,7 @@ bool MCAssembler::relaxLEB(MCLEBFragment &LF) {
if (UseZeroPad)
Value = 0;
}
- Data.clear();
+ SmallVector<char, 16> Data;
raw_svector_ostream OSE(Data);
// The compiler can generate EH table assembly that is impossible to assemble
// without either adding padding to an LEB fragment or adding extra padding
@@ -1037,7 +1039,8 @@ bool MCAssembler::relaxLEB(MCLEBFragment &LF) {
encodeSLEB128(Value, OSE, PadTo);
else
encodeULEB128(Value, OSE, PadTo);
- return OldSize != LF.getContents().size();
+ LF.setContents(Data);
+ return OldSize != Data.size();
}
/// Check if the branch crosses the boundary.
@@ -1107,19 +1110,19 @@ bool MCAssembler::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF) {
return WasRelaxed;
MCContext &Context = getContext();
- uint64_t OldSize = DF.getContents().size();
+ auto OldSize = DF.getContents().size();
int64_t AddrDelta;
bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, *this);
assert(Abs && "We created a line delta with an invalid expression");
(void)Abs;
int64_t LineDelta;
LineDelta = DF.getLineDelta();
- SmallVectorImpl<char> &Data = DF.getContents();
- Data.clear();
- DF.getFixups().clear();
+ SmallVector<char, 8> Data;
MCDwarfLineAddr::encode(Context, getDWARFLinetableParams(), LineDelta,
AddrDelta, Data);
+ DF.setContents(Data);
+ DF.clearFixups();
return OldSize != Data.size();
}
@@ -1138,12 +1141,11 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCDwarfCallFrameFragment &DF) {
return false;
}
- SmallVectorImpl<char> &Data = DF.getContents();
- uint64_t OldSize = Data.size();
- Data.clear();
- DF.getFixups().clear();
-
+ auto OldSize = DF.getContents().size();
+ SmallVector<char, 8> Data;
MCDwarfFrameEmitter::encodeAdvanceLoc(Context, Value, Data);
+ DF.setContents(Data);
+ DF.clearFixups();
return OldSize != Data.size();
}
@@ -1173,13 +1175,13 @@ bool MCAssembler::relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &PF) {
bool Abs = PF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, *this);
assert(Abs && "We created a pseudo probe with an invalid expression");
(void)Abs;
- SmallVectorImpl<char> &Data = PF.getContents();
- Data.clear();
+ SmallVector<char, 8> Data;
raw_svector_ostream OSE(Data);
- PF.getFixups().clear();
// AddrDelta is a signed integer
encodeSLEB128(AddrDelta, OSE, OldSize);
+ PF.setContents(Data);
+ PF.clearFixups();
return OldSize != Data.size();
}
diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp
index e05374783d2b4..7a50e5c046147 100644
--- a/llvm/lib/MC/MCCodeView.cpp
+++ b/llvm/lib/MC/MCCodeView.cpp
@@ -512,7 +512,7 @@ void CodeViewContext::encodeInlineLineTable(const MCAssembler &Asm,
MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(Frag.SiteFuncId);
- SmallVectorImpl<char> &Buffer = Frag.getContents();
+ SmallVector<char, 0> Buffer;
Buffer.clear(); // Clear old contents if we went through relaxation.
for (const MCCVLoc &Loc : Locs) {
// Exit early if our line table would produce an oversized InlineSiteSym
@@ -606,15 +606,14 @@ void CodeViewContext::encodeInlineLineTable(const MCAssembler &Asm,
compressAnnotation(BinaryAnnotationsOpCode::ChangeCodeLength, Buffer);
compressAnnotation(std::min(EndSymLength, LocAfterLength), Buffer);
+ Frag.setContents(Buffer);
}
void CodeViewContext::encodeDefRange(const MCAssembler &Asm,
MCCVDefRangeFragment &Frag) {
MCContext &Ctx = Asm.getContext();
- SmallVectorImpl<char> &Contents = Frag.getContents();
- Contents.clear();
- SmallVectorImpl<MCFixup> &Fixups = Frag.getFixups();
- Fixups.clear();
+ SmallVector<char, 0> Contents;
+ SmallVector<MCFixup, 0> Fixups;
raw_svector_ostream OS(Contents);
// Compute all the sizes up front.
@@ -694,4 +693,7 @@ void CodeViewContext::encodeDefRange(const MCAssembler &Asm,
GapStartOffset += GapSize + RangeSize;
}
}
+
+ Frag.setContents(Contents);
+ Frag.setFixups(Fixups);
}
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 5a514f12ce549..41be72ebeb9ab 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -449,11 +449,13 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst,
// Emit instruction directly into data fragment.
size_t FixupStartIndex = DF->getFixups().size();
size_t CodeOffset = DF->getContents().size();
- Assembler.getEmitter().encodeInstruction(Inst, DF->getContents(),
- DF->getFixups(), STI);
+ SmallVector<MCFixup, 1> Fixups;
+ Assembler.getEmitter().encodeInstruction(Inst, DF->getContentsForAppending(),
+ Fixups, STI);
+ DF->doneAppending();
+ DF->appendFixups(Fixups);
- auto Fixups = MutableArrayRef(DF->getFixups()).slice(FixupStartIndex);
- for (auto &Fixup : Fixups) {
+ for (auto &Fixup : MutableArrayRef(DF->getFixups()).slice(FixupStartIndex)) {
Fixup.setOffset(Fixup.getOffset() + CodeOffset);
if (Fixup.isLinkerRelaxable()) {
DF->setLinkerRelaxable();
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index d8db55c9a5f5d..859ee73447aad 100644
--- a/llvm/lib/MC/MCFrag...
[truncated]
|
@llvm/pr-subscribers-mc Author: Fangrui Song (MaskRay) ChangesMoved For appending content to the current section, use Moving only Contents out-of-line caused a slight performance regression Now using a plain SmallVector in MCSection for storage, with potential Co-authored-by: Alexis Engelke <engelke@in.tum.de> Patch is 37.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146307.diff 13 Files Affected:
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index 2affa85c744ba..789f9a0e602b2 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -30,6 +30,9 @@ class MCSection;
class MCSubtargetInfo;
class MCSymbol;
+// Represents a contiguous segment of code or data within a section. Its size is
+// determined by MCAssembler::layout. All subclasses (except
+// MCRelaxableFragment, which stores a MCInst) must have trivial destructors.
class MCFragment {
friend class MCAssembler;
friend class MCObjectStreamer;
@@ -86,12 +89,6 @@ class MCFragment {
MCFragment(const MCFragment &) = delete;
MCFragment &operator=(const MCFragment &) = delete;
- /// Destroys the current fragment.
- ///
- /// This must be used instead of delete as MCFragment is non-virtual.
- /// This method will dispatch to the appropriate subclass.
- LLVM_ABI void destroy();
-
MCFragment *getNext() const { return Next; }
FragmentType getKind() const { return Kind; }
@@ -113,10 +110,7 @@ class MCFragment {
/// Interface implemented by fragments that contain encoded instructions and/or
/// data.
-///
class MCEncodedFragment : public MCFragment {
- uint8_t BundlePadding = 0;
-
protected:
MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions)
: MCFragment(FType, HasInstructions) {}
@@ -125,6 +119,13 @@ class MCEncodedFragment : public MCFragment {
/// It must be non-null for instructions.
const MCSubtargetInfo *STI = nullptr;
+private:
+ uint32_t ContentStart = 0;
+ uint32_t ContentSize = 0;
+ uint32_t FixupStart = 0;
+ uint32_t FixupSize = 0;
+ uint8_t BundlePadding = 0;
+
public:
static bool classof(const MCFragment *F) {
MCFragment::FragmentType Kind = F->getKind();
@@ -136,6 +137,7 @@ class MCEncodedFragment : public MCFragment {
case MCFragment::FT_Dwarf:
case MCFragment::FT_DwarfFrame:
case MCFragment::FT_PseudoProbe:
+ case MCFragment::FT_CVInlineLines:
return true;
}
}
@@ -165,48 +167,33 @@ class MCEncodedFragment : public MCFragment {
HasInstructions = true;
this->STI = &STI;
}
-};
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data and also have fixups registered.
-///
-template <unsigned ContentsSize, unsigned FixupsSize>
-class MCEncodedFragmentWithFixups : public MCEncodedFragment {
- SmallVector<char, ContentsSize> Contents;
-
- /// The list of fixups in this fragment.
- SmallVector<MCFixup, FixupsSize> Fixups;
-
-protected:
- MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
- bool HasInstructions)
- : MCEncodedFragment(FType, HasInstructions) {}
-
-public:
- SmallVectorImpl<char> &getContents() { return Contents; }
- const SmallVectorImpl<char> &getContents() const { return Contents; }
-
- void appendContents(ArrayRef<char> C) { Contents.append(C.begin(), C.end()); }
- void appendContents(size_t Num, char Elt) { Contents.append(Num, Elt); }
- void setContents(ArrayRef<char> C) { Contents.assign(C.begin(), C.end()); }
-
- void addFixup(MCFixup Fixup) { Fixups.push_back(Fixup); }
- SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
- const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
-
- static bool classof(const MCFragment *F) {
- MCFragment::FragmentType Kind = F->getKind();
- return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data ||
- Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf ||
- Kind == MCFragment::FT_DwarfFrame;
- }
+ // Content-related functions manage parent's storage using ContentStart and
+ // ContentSize.
+ void clearContents() { ContentSize = 0; }
+ SmallVectorImpl<char> &getContentsForAppending();
+ void doneAppending();
+ void appendContents(ArrayRef<char> Contents);
+ void appendContents(size_t Num, char Elt);
+ void setContents(ArrayRef<char> Contents);
+ MutableArrayRef<char> getContents();
+ ArrayRef<char> getContents() const;
+
+ // Fixup-related functions manage parent's storage using FixupStart and
+ // FixupSize.
+ void clearFixups() { FixupSize = 0; }
+ void addFixup(MCFixup Fixup);
+ void appendFixups(ArrayRef<MCFixup> Fixups);
+ void setFixups(ArrayRef<MCFixup> Fixups);
+ MutableArrayRef<MCFixup> getFixups();
+ ArrayRef<MCFixup> getFixups() const;
};
/// Fragment for data and encoded instructions.
///
-class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
+class MCDataFragment : public MCEncodedFragment {
public:
- MCDataFragment() : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false) {}
+ MCDataFragment() : MCEncodedFragment(FT_Data, false) {}
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Data;
@@ -219,13 +206,13 @@ class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
/// A relaxable fragment holds on to its MCInst, since it may need to be
/// relaxed during the assembler layout and relaxation stage.
///
-class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> {
+class MCRelaxableFragment : public MCEncodedFragment {
/// The instruction this is a fragment for.
MCInst Inst;
public:
MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI)
- : MCEncodedFragmentWithFixups(FT_Relaxable, true), Inst(Inst) {
+ : MCEncodedFragment(FT_Relaxable, true), Inst(Inst) {
this->STI = &STI;
}
@@ -374,7 +361,7 @@ class MCOrgFragment : public MCFragment {
}
};
-class MCLEBFragment final : public MCEncodedFragmentWithFixups<8, 0> {
+class MCLEBFragment final : public MCEncodedFragment {
/// True if this is a sleb128, false if uleb128.
bool IsSigned;
@@ -383,24 +370,19 @@ class MCLEBFragment final : public MCEncodedFragmentWithFixups<8, 0> {
public:
MCLEBFragment(const MCExpr &Value, bool IsSigned)
- : MCEncodedFragmentWithFixups<8, 0>(FT_LEB, false), IsSigned(IsSigned),
- Value(&Value) {
- getContents().push_back(0);
- }
+ : MCEncodedFragment(FT_LEB, false), IsSigned(IsSigned), Value(&Value) {}
const MCExpr &getValue() const { return *Value; }
void setValue(const MCExpr *Expr) { Value = Expr; }
bool isSigned() const { return IsSigned; }
- /// @}
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_LEB;
}
};
-class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
+class MCDwarfLineAddrFragment : public MCEncodedFragment {
/// The value of the difference between the two line numbers
/// between two .loc dwarf directives.
int64_t LineDelta;
@@ -411,8 +393,8 @@ class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
public:
MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta)
- : MCEncodedFragmentWithFixups<8, 1>(FT_Dwarf, false),
- LineDelta(LineDelta), AddrDelta(&AddrDelta) {}
+ : MCEncodedFragment(FT_Dwarf, false), LineDelta(LineDelta),
+ AddrDelta(&AddrDelta) {}
int64_t getLineDelta() const { return LineDelta; }
@@ -423,15 +405,14 @@ class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
}
};
-class MCDwarfCallFrameFragment : public MCEncodedFragmentWithFixups<8, 1> {
+class MCDwarfCallFrameFragment : public MCEncodedFragment {
/// The expression for the difference of the two symbols that
/// make up the address delta between two .cfi_* dwarf directives.
const MCExpr *AddrDelta;
public:
MCDwarfCallFrameFragment(const MCExpr &AddrDelta)
- : MCEncodedFragmentWithFixups<8, 1>(FT_DwarfFrame, false),
- AddrDelta(&AddrDelta) {}
+ : MCEncodedFragment(FT_DwarfFrame, false), AddrDelta(&AddrDelta) {}
const MCExpr &getAddrDelta() const { return *AddrDelta; }
void setAddrDelta(const MCExpr *E) { AddrDelta = E; }
@@ -459,13 +440,12 @@ class MCSymbolIdFragment : public MCFragment {
/// Fragment representing the binary annotations produced by the
/// .cv_inline_linetable directive.
-class MCCVInlineLineTableFragment : public MCFragment {
+class MCCVInlineLineTableFragment : public MCEncodedFragment {
unsigned SiteFuncId;
unsigned StartFileId;
unsigned StartLineNum;
const MCSymbol *FnStartSym;
const MCSymbol *FnEndSym;
- SmallString<8> Contents;
/// CodeViewContext has the real knowledge about this format, so let it access
/// our members.
@@ -475,23 +455,20 @@ class MCCVInlineLineTableFragment : public MCFragment {
MCCVInlineLineTableFragment(unsigned SiteFuncId, unsigned StartFileId,
unsigned StartLineNum, const MCSymbol *FnStartSym,
const MCSymbol *FnEndSym)
- : MCFragment(FT_CVInlineLines, false), SiteFuncId(SiteFuncId),
+ : MCEncodedFragment(FT_CVInlineLines, false), SiteFuncId(SiteFuncId),
StartFileId(StartFileId), StartLineNum(StartLineNum),
FnStartSym(FnStartSym), FnEndSym(FnEndSym) {}
const MCSymbol *getFnStartSym() const { return FnStartSym; }
const MCSymbol *getFnEndSym() const { return FnEndSym; }
- SmallString<8> &getContents() { return Contents; }
- const SmallString<8> &getContents() const { return Contents; }
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_CVInlineLines;
}
};
/// Fragment representing the .cv_def_range directive.
-class MCCVDefRangeFragment : public MCEncodedFragmentWithFixups<32, 4> {
+class MCCVDefRangeFragment : public MCEncodedFragment {
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
StringRef FixedSizePortion;
@@ -503,8 +480,9 @@ class MCCVDefRangeFragment : public MCEncodedFragmentWithFixups<32, 4> {
MCCVDefRangeFragment(
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
StringRef FixedSizePortion)
- : MCEncodedFragmentWithFixups<32, 4>(FT_CVDefRange, false),
- Ranges(Ranges), FixedSizePortion(FixedSizePortion) {}
+ : MCEncodedFragment(FT_CVDefRange, false),
+ Ranges(Ranges.begin(), Ranges.end()),
+ FixedSizePortion(FixedSizePortion) {}
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> getRanges() const {
return Ranges;
@@ -556,15 +534,14 @@ class MCBoundaryAlignFragment : public MCFragment {
}
};
-class MCPseudoProbeAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
+class MCPseudoProbeAddrFragment : public MCEncodedFragment {
/// The expression for the difference of the two symbols that
/// make up the address delta between two .pseudoprobe directives.
const MCExpr *AddrDelta;
public:
MCPseudoProbeAddrFragment(const MCExpr *AddrDelta)
- : MCEncodedFragmentWithFixups<8, 1>(FT_PseudoProbe, false),
- AddrDelta(AddrDelta) {}
+ : MCEncodedFragment(FT_PseudoProbe, false), AddrDelta(AddrDelta) {}
const MCExpr &getAddrDelta() const { return *AddrDelta; }
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index a4e391dfb03ea..fb59c74b7c484 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -39,6 +39,7 @@ class LLVM_ABI MCSection {
public:
friend MCAssembler;
friend MCObjectStreamer;
+ friend class MCEncodedFragment;
static constexpr unsigned NonUniqueID = ~0U;
enum SectionVariant {
@@ -116,6 +117,10 @@ class LLVM_ABI MCSection {
// subsections.
SmallVector<std::pair<unsigned, FragList>, 1> Subsections;
+ // Content and fixup storage for fragments
+ SmallVector<char, 0> ContentStorage;
+ SmallVector<MCFixup, 0> FixupStorage;
+
protected:
// TODO Make Name private when possible.
StringRef Name;
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index bd2242af23f7c..f3ad5b6d71b80 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -607,12 +607,14 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
case MCFragment::FT_Data:
++stats::EmittedDataFragments;
- OS << cast<MCDataFragment>(F).getContents();
+ OS << StringRef(cast<MCDataFragment>(F).getContents().data(),
+ cast<MCDataFragment>(F).getContents().size());
break;
case MCFragment::FT_Relaxable:
++stats::EmittedRelaxableFragments;
- OS << cast<MCRelaxableFragment>(F).getContents();
+ OS << StringRef(cast<MCRelaxableFragment>(F).getContents().data(),
+ cast<MCRelaxableFragment>(F).getContents().size());
break;
case MCFragment::FT_Fill: {
@@ -692,7 +694,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
case MCFragment::FT_LEB: {
const MCLEBFragment &LF = cast<MCLEBFragment>(F);
- OS << LF.getContents();
+ OS << StringRef(LF.getContents().data(), LF.getContents().size());
break;
}
@@ -722,27 +724,27 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
case MCFragment::FT_Dwarf: {
const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
- OS << OF.getContents();
+ OS << StringRef(OF.getContents().data(), OF.getContents().size());
break;
}
case MCFragment::FT_DwarfFrame: {
const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
- OS << CF.getContents();
+ OS << StringRef(CF.getContents().data(), CF.getContents().size());
break;
}
case MCFragment::FT_CVInlineLines: {
const auto &OF = cast<MCCVInlineLineTableFragment>(F);
- OS << OF.getContents();
+ OS << StringRef(OF.getContents().data(), OF.getContents().size());
break;
}
case MCFragment::FT_CVDefRange: {
const auto &DRF = cast<MCCVDefRangeFragment>(F);
- OS << DRF.getContents();
+ OS << StringRef(DRF.getContents().data(), DRF.getContents().size());
break;
}
case MCFragment::FT_PseudoProbe: {
const MCPseudoProbeAddrFragment &PF = cast<MCPseudoProbeAddrFragment>(F);
- OS << PF.getContents();
+ OS << StringRef(PF.getContents().data(), PF.getContents().size());
break;
}
}
@@ -994,10 +996,11 @@ bool MCAssembler::relaxInstruction(MCRelaxableFragment &F) {
// Encode the new instruction.
F.setInst(Relaxed);
- F.getFixups().clear();
- F.getContents().clear();
- getEmitter().encodeInstruction(Relaxed, F.getContents(), F.getFixups(),
- *F.getSubtargetInfo());
+ SmallVector<char, 16> Data;
+ SmallVector<MCFixup, 1> Fixups;
+ getEmitter().encodeInstruction(Relaxed, Data, Fixups, *F.getSubtargetInfo());
+ F.setContents(Data);
+ F.setFixups(Fixups);
return true;
}
@@ -1005,8 +1008,7 @@ bool MCAssembler::relaxLEB(MCLEBFragment &LF) {
const unsigned OldSize = static_cast<unsigned>(LF.getContents().size());
unsigned PadTo = OldSize;
int64_t Value;
- SmallVectorImpl<char> &Data = LF.getContents();
- LF.getFixups().clear();
+ LF.clearFixups();
// Use evaluateKnownAbsolute for Mach-O as a hack: .subsections_via_symbols
// requires that .uleb128 A-B is foldable where A and B reside in different
// fragments. This is used by __gcc_except_table.
@@ -1027,7 +1029,7 @@ bool MCAssembler::relaxLEB(MCLEBFragment &LF) {
if (UseZeroPad)
Value = 0;
}
- Data.clear();
+ SmallVector<char, 16> Data;
raw_svector_ostream OSE(Data);
// The compiler can generate EH table assembly that is impossible to assemble
// without either adding padding to an LEB fragment or adding extra padding
@@ -1037,7 +1039,8 @@ bool MCAssembler::relaxLEB(MCLEBFragment &LF) {
encodeSLEB128(Value, OSE, PadTo);
else
encodeULEB128(Value, OSE, PadTo);
- return OldSize != LF.getContents().size();
+ LF.setContents(Data);
+ return OldSize != Data.size();
}
/// Check if the branch crosses the boundary.
@@ -1107,19 +1110,19 @@ bool MCAssembler::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF) {
return WasRelaxed;
MCContext &Context = getContext();
- uint64_t OldSize = DF.getContents().size();
+ auto OldSize = DF.getContents().size();
int64_t AddrDelta;
bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, *this);
assert(Abs && "We created a line delta with an invalid expression");
(void)Abs;
int64_t LineDelta;
LineDelta = DF.getLineDelta();
- SmallVectorImpl<char> &Data = DF.getContents();
- Data.clear();
- DF.getFixups().clear();
+ SmallVector<char, 8> Data;
MCDwarfLineAddr::encode(Context, getDWARFLinetableParams(), LineDelta,
AddrDelta, Data);
+ DF.setContents(Data);
+ DF.clearFixups();
return OldSize != Data.size();
}
@@ -1138,12 +1141,11 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCDwarfCallFrameFragment &DF) {
return false;
}
- SmallVectorImpl<char> &Data = DF.getContents();
- uint64_t OldSize = Data.size();
- Data.clear();
- DF.getFixups().clear();
-
+ auto OldSize = DF.getContents().size();
+ SmallVector<char, 8> Data;
MCDwarfFrameEmitter::encodeAdvanceLoc(Context, Value, Data);
+ DF.setContents(Data);
+ DF.clearFixups();
return OldSize != Data.size();
}
@@ -1173,13 +1175,13 @@ bool MCAssembler::relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &PF) {
bool Abs = PF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, *this);
assert(Abs && "We created a pseudo probe with an invalid expression");
(void)Abs;
- SmallVectorImpl<char> &Data = PF.getContents();
- Data.clear();
+ SmallVector<char, 8> Data;
raw_svector_ostream OSE(Data);
- PF.getFixups().clear();
// AddrDelta is a signed integer
encodeSLEB128(AddrDelta, OSE, OldSize);
+ PF.setContents(Data);
+ PF.clearFixups();
return OldSize != Data.size();
}
diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp
index e05374783d2b4..7a50e5c046147 100644
--- a/llvm/lib/MC/MCCodeView.cpp
+++ b/llvm/lib/MC/MCCodeView.cpp
@@ -512,7 +512,7 @@ void CodeViewContext::encodeInlineLineTable(const MCAssembler &Asm,
MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(Frag.SiteFuncId);
- SmallVectorImpl<char> &Buffer = Frag.getContents();
+ SmallVector<char, 0> Buffer;
Buffer.clear(); // Clear old contents if we went through relaxation.
for (const MCCVLoc &Loc : Locs) {
// Exit early if our line table would produce an oversized InlineSiteSym
@@ -606,15 +606,14 @@ void CodeViewContext::encodeInlineLineTable(const MCAssembler &Asm,
compressAnnotation(BinaryAnnotationsOpCode::ChangeCodeLength, Buffer);
compressAnnotation(std::min(EndSymLength, LocAfterLength), Buffer);
+ Frag.setContents(Buffer);
}
void CodeViewContext::encodeDefRange(const MCAssembler &Asm,
MCCVDefRangeFragment &Frag) {
MCContext &Ctx = Asm.getContext();
- SmallVectorImpl<char> &Contents = Frag.getContents();
- Contents.clear();
- SmallVectorImpl<MCFixup> &Fixups = Frag.getFixups();
- Fixups.clear();
+ SmallVector<char, 0> Contents;
+ SmallVector<MCFixup, 0> Fixups;
raw_svector_ostream OS(Contents);
// Compute all the sizes up front.
@@ -694,4 +693,7 @@ void CodeViewContext::encodeDefRange(const MCAssembler &Asm,
GapStartOffset += GapSize + RangeSize;
}
}
+
+ Frag.setContents(Contents);
+ Frag.setFixups(Fixups);
}
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 5a514f12ce549..41be72ebeb9ab 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -449,11 +449,13 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst,
// Emit instruction directly into data fragment.
size_t FixupStartIndex = DF->getFixups().size();
size_t CodeOffset = DF->getContents().size();
- Assembler.getEmitter().encodeInstruction(Inst, DF->getContents(),
- DF->getFixups(), STI);
+ SmallVector<MCFixup, 1> Fixups;
+ Assembler.getEmitter().encodeInstruction(Inst, DF->getContentsForAppending(),
+ Fixups, STI);
+ DF->doneAppending();
+ DF->appendFixups(Fixups);
- auto Fixups = MutableArrayRef(DF->getFixups()).slice(FixupStartIndex);
- for (auto &Fixup : Fixups) {
+ for (auto &Fixup : MutableArrayRef(DF->getFixups()).slice(FixupStartIndex)) {
Fixup.setOffset(Fixup.getOffset() + CodeOffset);
if (Fixup.isLinkerRelaxable()) {
DF->setLinkerRelaxable();
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index d8db55c9a5f5d..859ee73447aad 100644
--- a/llvm/lib/MC/MCFrag...
[truncated]
|
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for all fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [modest max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=e47d4010d34119c2b4a28e7609fde35449a8b437&to=983887637aaf87fc1ca233a8848a5637fac8f524&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. Now using a plain SmallVector in MCSection for storage, with potential for future allocator optimizations. (GNU Assembler uses gnulib obstack for fragments.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for all fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [modest max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=e47d4010d34119c2b4a28e7609fde35449a8b437&to=983887637aaf87fc1ca233a8848a5637fac8f524&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. Now using a plain SmallVector in MCSection for storage, with potential for future allocator optimizations. (GNU Assembler uses gnulib obstack for fragments.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Generally LGTM. Thanks a lot for finishing this up!
I'm not too happy about the performance loss, but I still think this is a good direction nonetheless. I have some (unbenchmarked) suggestions for perf improvements, maybe we could evaluate these before merging?
llvm/lib/MC/MCObjectStreamer.cpp
Outdated
Fixup.setOffset(Fixup.getOffset() + CodeOffset); | ||
DF->addFixup(Fixup); | ||
} | ||
DF->appendFixups(Fixups); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check if Fixups is not empty? Most instructions don't have fixups, so we can avoid an out-of-line call in these cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
llvm/lib/MC/MCELFStreamer.cpp
Outdated
Assembler.getEmitter().encodeInstruction(Inst, DF->getContentsForAppending(), | ||
Fixups, STI); | ||
DF->doneAppending(); | ||
DF->appendFixups(Fixups); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here. Early return for empty fixups, getFixups below is also out-of-line (setHasInstructions can be moved up).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
llvm/lib/MC/MCAssembler.cpp
Outdated
@@ -1027,7 +1029,7 @@ bool MCAssembler::relaxLEB(MCLEBFragment &LF) { | |||
if (UseZeroPad) | |||
Value = 0; | |||
} | |||
Data.clear(); | |||
SmallVector<char, 16> Data; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
array + encode*LEB128(Value, Data.data(), PadTo) to avoid raw_ostream overhead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
llvm/lib/MC/MCFragment.cpp
Outdated
return cast<MCSectionMachO>(Parent)->getAtom(LayoutOrder); | ||
} | ||
|
||
SmallVectorImpl<char> &MCEncodedFragment::getContentsForAppending() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe put an inline implementation of the hot path in MCSection.h? (Single check ContentStart + ContentSize != S.size() should suffice.) Same for doneAppending, I expect these are the most performance-critical functions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MCSection.h includes MCFragment.h. In MCFragment.h, MCSection is an incomplete type, so we cannot inspect its member.... Perhaps we should merge MCFragment.h into MCSection.h
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Merging is good. (I thought of just adding inline MCFragment::xxx
definitions to MCSection.h, but merging is cleaner.)
llvm/include/llvm/MC/MCFragment.h
Outdated
@@ -125,6 +119,13 @@ class MCEncodedFragment : public MCFragment { | |||
/// It must be non-null for instructions. | |||
const MCSubtargetInfo *STI = nullptr; | |||
|
|||
private: | |||
uint32_t ContentStart = 0; | |||
uint32_t ContentSize = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One more comment: This limits the size of actual section contents to 2**32-1, I think we supported larger sections before? Maybe we should use size_t here. Depending on usage, we might also want to store start/end instead of start/size to simplify checks w.r.t. ContentStorage.size().
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The content size is limited to 2^32-1, which is somewhat restrictive but necessary to keep the MCDataFragment class size smaller. This is a makeshift. In the future we should try allocating the content as the trailing object of MCDataFragment and adjusting allocFragment
. GNU Assembler uses gnulib obstack to allocate content as part of its fragment structure.
(BTW, BundlePadding
(due to deprecated NaCl) is at offset 56. If we remove NaCl, we can likely make MCDataFragment smaller.)
Created using spr 1.3.5-bogner
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
llvm/lib/MC/MCCodeView.cpp
Outdated
@@ -512,7 +512,7 @@ void CodeViewContext::encodeInlineLineTable(const MCAssembler &Asm, | |||
|
|||
MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(Frag.SiteFuncId); | |||
|
|||
SmallVectorImpl<char> &Buffer = Frag.getContents(); | |||
SmallVector<char, 0> Buffer; | |||
Buffer.clear(); // Clear old contents if we went through relaxation. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clear isn't necessary anymore.
llvm/lib/MC/MCFragment.cpp
Outdated
} else if (ContentStart + ContentSize != S.size()) { | ||
// If not empty and not at the storage end, move to the storage end. | ||
auto I = std::exchange(ContentStart, S.size()); | ||
S.reserve(S.size() + ContentSize); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Won't append
do this reserve itself before it copies the range?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://reviews.llvm.org/D91744
S.begin() + I
used by append is invalidated upon reallocation. So we have to call reserve
first. I'll improve the comment.
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
so that ~MCCVInlineLineTableFragment will become trivial when we make ~MCEncodedFragment trivial (llvm#146307).
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
Created using spr 1.3.5-bogner
Switched to more inline functions. The instructions:u increase for -O0 -g is smaller now: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. I think the compile time loss is now acceptable, compared to the large max-rss wins.
class MCEncodedFragment : public MCFragment { | ||
uint8_t BundlePadding = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think keeping this here would keep the size of an MCEncodedFragment smaller, there is no padding after base classes (MCFragment is 30 bytes). But given that we hopefully can remove it altogether very soon, I don't think this is important now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. Kept BundlePadding at the beginning.
stage1-ReleaseLTO-g (link only):
Benchmark | Old | New-- | -- | --
kimwitu++ | 234MiB | 221MiB (-5.37%)
sqlite3 | 201MiB | 182MiB (-9.12%)
consumer-typeset | 154MiB | 144MiB (-6.18%)
Bullet | 198MiB | 193MiB (-2.41%)
tramp3d-v4 | 624MiB | 592MiB (-5.13%)
mafft | 124MiB | 122MiB (-1.49%)
ClamAV | 213MiB | 196MiB (-8.39%)
lencod | 264MiB | 262MiB (-0.98%)
SPASS | 288MiB | 285MiB (-1.13%)
7zip | 432MiB | 397MiB (-8.05%)
geomean | 245MiB | 233MiB (-4.87%)
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=393aebf5c218c74cdad381610ed05bd6d048f532&to=43a18c7c47f9da58dec5f15589ca9b9ecc60f4f7&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. ( An older version using fewer inline functions: https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&linkStats=on ) Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
Created using spr 1.3.5-bogner
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in minor instructions:u increase and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=393aebf5c218c74cdad381610ed05bd6d048f532&to=43a18c7c47f9da58dec5f15589ca9b9ecc60f4f7&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. ( An older version using fewer inline functions: https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&linkStats=on ) Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in neglgible instructions:u increase for "stage2-O0-g" and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=84e82746c3ff63ec23a8b85e9efd4f7fccf92590&to=555a28c0b2f8250a9cf86fd267a04b0460283e15&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. ( An older version using fewer inline functions: https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&linkStats=on ) Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm/llvm-project#146307
... due to their close relationship. MCSection's inline functions (e.g. iterator) access MCFragment, and we want MCFragment's inline functions to access MCSection similarly (llvm#146307). Pull Request: llvm#146315
…NFC (llvm#146462) so that ~MCCVInlineLineTableFragment will become trivial when we make ~MCEncodedFragment trivial (llvm#146307).
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in neglgible instructions:u increase for "stage2-O0-g" and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=84e82746c3ff63ec23a8b85e9efd4f7fccf92590&to=555a28c0b2f8250a9cf86fd267a04b0460283e15&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. ( An older version using fewer inline functions: https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&linkStats=on ) Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
... due to their close relationship. MCSection's inline functions (e.g. iterator) access MCFragment, and we want MCFragment's inline functions to access MCSection similarly (llvm#146307). Pull Request: llvm#146315
…NFC (llvm#146462) so that ~MCCVInlineLineTableFragment will become trivial when we make ~MCEncodedFragment trivial (llvm#146307).
Moved `Contents` and `Fixups` SmallVector storage to MCSection, enabling trivial destructors for most fragment subclasses and eliminating the need for MCFragment::destroy in ~MCSection. For appending content to the current section, use getContentsForAppending. During assembler relaxation, prefer setContents/setFixups, which may involve copying and reduce the benefits of https://reviews.llvm.org/D145791. Moving only Contents out-of-line caused a slight performance regression (Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line, fragment destructors become trivial, resulting in neglgible instructions:u increase for "stage2-O0-g" and [large max-rss decrease](https://llvm-compile-time-tracker.com/compare.php?from=84e82746c3ff63ec23a8b85e9efd4f7fccf92590&to=555a28c0b2f8250a9cf86fd267a04b0460283e15&stat=max-rss&linkStats=on) for the "stage1-ReleaseLTO-g (link only)" benchmark. ( An older version using fewer inline functions: https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&linkStats=on ) Now using plain SmallVector in MCSection for storage, with potential for future allocator optimizations, such as allocating `Contents` as the trailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack for fragment management.) Co-authored-by: Alexis Engelke <engelke@in.tum.de> Pull Request: llvm#146307
Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Follow-up to llvm#146307
Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Small max-rss decrese for stage1-ReleaseLTO-g (link only) and negligible instructions:u increase for -O0 -g https://llvm-compile-time-tracker.com/compare.php?from=6a9a16da7a380e18b996bb6573eeb18b913204fc&to=3b9b804a80de7d823b1c087f339b632f9374ffca&stat=max-rss&linkStats=on The instructions:u increase can likely be mitigated if we refactor relaxInstruction to accept Opcode/Operands instead of MCInst. Follow-up to llvm#146307
Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Follow-up to llvm#146307
Follow-up to llvm#146307 Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Updated MCRelaxableFragment::getInst to construct an MCInst on demand. Modified MCAssembler::relaxInstruction's mayNeedRelaxation to accept opcode and operands instead of an MCInst, avoiding redundant MCInst creation. Note that MCObjectStreamer::emitInstructionImpl calls mayNeedRelaxation before determining the target fragment for the MCInst. Next: Enable MCDataFragment to store optional Opcode/Operands data (when OperandSize != 0), allowing re-encoding of Data+Relax+Data+Relax sequences as Data+Data. The saving should outweigh the downside of larger MCDataFragment.
Follow-up to llvm#146307 Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Updated MCRelaxableFragment::getInst to construct an MCInst on demand. Modified MCAssembler::relaxInstruction's mayNeedRelaxation to accept opcode and operands instead of an MCInst, avoiding redundant MCInst creation. Note that MCObjectStreamer::emitInstructionImpl calls mayNeedRelaxation before determining the target fragment for the MCInst. Next: Enable MCDataFragment to store optional Opcode/Operands data (when OperandSize != 0), allowing re-encoding of Data+Relax+Data+Relax sequences as Data+Data. The saving should outweigh the downside of larger MCDataFragment.
Follow-up to llvm#146307 Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Updated MCRelaxableFragment::getInst to construct an MCInst on demand. Modified MCAssembler::relaxInstruction's mayNeedRelaxation to accept opcode and operands instead of an MCInst, avoiding redundant MCInst creation. Note that MCObjectStreamer::emitInstructionImpl calls mayNeedRelaxation before determining the target fragment for the MCInst. There is a small decrease in max-rss (stage1-ReleaseLTO-g (link only)) with negligible instructions:u change. https://llvm-compile-time-tracker.com/compare.php?from=408e87184f8274e30d188a2fe198facf55243733&to=5e0997dca139e08c6ace0d404a311e1f1b2d3b3f&stat=max-rss&linkStats=on Next: Enable MCDataFragment to store optional Opcode/Operands data (when OperandSize != 0), allowing re-encoding of Data+Relax+Data+Relax sequences as Data+Data. The saving should outweigh the downside of larger MCDataFragment. Pull Request: llvm#147229
Follow-up to llvm#146307 Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Updated MCRelaxableFragment::getInst to construct an MCInst on demand. Modified MCAssembler::relaxInstruction's mayNeedRelaxation to accept opcode and operands instead of an MCInst, avoiding redundant MCInst creation. Note that MCObjectStreamer::emitInstructionImpl calls mayNeedRelaxation before determining the target fragment for the MCInst. There is a small decrease in max-rss (stage1-ReleaseLTO-g (link only)) with negligible instructions:u change. https://llvm-compile-time-tracker.com/compare.php?from=408e87184f8274e30d188a2fe198facf55243733&to=5e0997dca139e08c6ace0d404a311e1f1b2d3b3f&stat=max-rss&linkStats=on Next: Enable MCFragment to store fixed-size data (was MCDataFragment's job) and optional Opcode/Operands data (was MCRelaxableFragment's job), and delete MCDataFragment/MCRelaxableFragment. This will allow re-encoding of Data+Relax+Data+Relax sequences as Frag+Frag. The saving should outweigh the downside of larger MCFragment. Pull Request: llvm#147229
Follow-up to llvm#146307 Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Updated MCRelaxableFragment::getInst to construct an MCInst on demand. Modified MCAssembler::relaxInstruction's mayNeedRelaxation to accept opcode and operands instead of an MCInst, avoiding redundant MCInst creation. Note that MCObjectStreamer::emitInstructionImpl calls mayNeedRelaxation before determining the target fragment for the MCInst. Unfortunately, we also have to encode `MCInst::Flags` to support the EVEX prefix, e.g. `{evex} xorw $foo, %ax` There is a small decrease in max-rss (stage1-ReleaseLTO-g (link only)) with negligible instructions:u change. https://llvm-compile-time-tracker.com/compare.php?from=408e87184f8274e30d188a2fe198facf55243733&to=5e0997dca139e08c6ace0d404a311e1f1b2d3b3f&stat=max-rss&linkStats=on Next: Enable MCFragment to store fixed-size data (was MCDataFragment's job) and optional Opcode/Operands data (was MCRelaxableFragment's job), and delete MCDataFragment/MCRelaxableFragment. This will allow re-encoding of Data+Relax+Data+Relax sequences as Frag+Frag. The saving should outweigh the downside of larger MCFragment. Pull Request: llvm#147229
Follow-up to llvm#146307 Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Updated MCRelaxableFragment::getInst to construct an MCInst on demand. Modified MCAssembler::relaxInstruction's mayNeedRelaxation to accept opcode and operands instead of an MCInst, avoiding redundant MCInst creation. Note that MCObjectStreamer::emitInstructionImpl calls mayNeedRelaxation before determining the target fragment for the MCInst. Unfortunately, we also have to encode `MCInst::Flags` to support the EVEX prefix, e.g. `{evex} xorw $foo, %ax` There is a small decrease in max-rss (stage1-ReleaseLTO-g (link only)) with negligible instructions:u change. https://llvm-compile-time-tracker.com/compare.php?from=0b533f2d9f0551aaffb13dcac8e0fd0a952185b5&to=f26b57f33bc7ccae749a57dfc841de7ce2acc2ef&stat=max-rss&linkStats=on Next: Enable MCFragment to store fixed-size data (was MCDataFragment's job) and optional Opcode/Operands data (was MCRelaxableFragment's job), and delete MCDataFragment/MCRelaxableFragment. This will allow re-encoding of Data+Relax+Data+Relax sequences as Frag+Frag. The saving should outweigh the downside of larger MCFragment. Pull Request: llvm#147229
Follow-up to #146307 Moved MCInst storage to MCSection, enabling trivial ~MCRelaxableFragment and eliminating the need for a fragment walk in ~MCSection. Updated MCRelaxableFragment::getInst to construct an MCInst on demand. Modified MCAssembler::relaxInstruction's mayNeedRelaxation to accept opcode and operands instead of an MCInst, avoiding redundant MCInst creation. Note that MCObjectStreamer::emitInstructionImpl calls mayNeedRelaxation before determining the target fragment for the MCInst. Unfortunately, we also have to encode `MCInst::Flags` to support the EVEX prefix, e.g. `{evex} xorw $foo, %ax` There is a small decrease in max-rss (stage1-ReleaseLTO-g (link only)) with negligible instructions:u change. https://llvm-compile-time-tracker.com/compare.php?from=0b533f2d9f0551aaffb13dcac8e0fd0a952185b5&to=f26b57f33bc7ccae749a57dfc841de7ce2acc2ef&stat=max-rss&linkStats=on Next: Enable MCFragment to store fixed-size data (was MCDataFragment's job) and optional Opcode/Operands data (was MCRelaxableFragment's job), and delete MCDataFragment/MCRelaxableFragment. This will allow re-encoding of Data+Relax+Data+Relax sequences as Frag+Frag. The saving should outweigh the downside of larger MCFragment. Pull Request: #147229
Moved
Contents
andFixups
SmallVector storage to MCSection, enablingtrivial destructors for most fragment subclasses and eliminating the need
for MCFragment::destroy in ~MCSection.
For appending content to the current section, use
getContentsForAppending. During assembler relaxation, prefer
setContents/setFixups, which may involve copying and reduce the benefits
of https://reviews.llvm.org/D145791.
Moving only Contents out-of-line caused a slight performance regression
(Alexis Engelke's 2024 prototype). By also moving Fragments out-of-line,
fragment destructors become trivial, resulting in
neglgible instructions:u increase for "stage2-O0-g" and large max-rss decrease
for the "stage1-ReleaseLTO-g (link only)" benchmark.
(
An older version using fewer inline functions: https://llvm-compile-time-tracker.com/compare.php?from=bb982e733cfcda7e4cfb0583544f68af65211ed1&to=f12d55f97c47717d438951ecddecf8ebd28c296b&linkStats=on
)
Now using plain SmallVector in MCSection for storage, with potential for
future allocator optimizations, such as allocating
Contents
as thetrailing object of MCDataFragment. (GNU Assembler uses gnulib's obstack
for fragment management.)
Co-authored-by: Alexis Engelke engelke@in.tum.de