-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstrFDO][TypeProf] Implement binary instrumentation and profile read/write #66825
Conversation
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-llvm-analysis ChangesThe changes include:
Patch is 127.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66825.diff 44 Files Affected:
diff --git a/clang/test/CodeGen/coverage-profile-raw-version.c b/clang/test/CodeGen/coverage-profile-raw-version.c
index 749dce50298f025..bb30fd8c1c70ae7 100644
--- a/clang/test/CodeGen/coverage-profile-raw-version.c
+++ b/clang/test/CodeGen/coverage-profile-raw-version.c
@@ -1,8 +1,8 @@
// RUN: %clang_cc1 -debug-info-kind=standalone -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -debug-info-kind=standalone -mllvm -debug-info-correlate -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -o - %s | FileCheck %s --check-prefix=DEBUG_INFO
-// CHECK: @__llvm_profile_raw_version = {{.*}}constant i64 8
-// DEBUG_INFO: @__llvm_profile_raw_version = {{.*}}constant i64 576460752303423496
+// CHECK: @__llvm_profile_raw_version = {{.*}}constant i64 9
+// DEBUG_INFO: @__llvm_profile_raw_version = {{.*}}constant i64 576460752303423497
int main() {
return 0;
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 4456bf1ab176325..b49f9d983e0f46d 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -92,6 +92,19 @@ INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \
/* INSTR_PROF_DATA end. */
+#ifndef INSTR_PROF_VTABLE_DATA
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_VTABLE_DATA_DEFINED
+#endif
+INSTR_PROF_VTABLE_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), VTableNameHash, \
+ ConstantInt::get(llvm::Type::getInt64Ty(Ctx), IndexedInstrProf::ComputeHash(VTableName)))
+INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), VTablePointer, VTableAddr)
+INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize, \
+ ConstantInt::get(llvm::Type::getInt32Ty(Ctx), VTableSizeVal))
+#undef INSTR_PROF_VTABLE_DATA
+/* INSTR_PROF_VTABLE_DATA end. */
+
/* This is an internal data structure used by value profiler. It
* is defined here to allow serialization code sharing by LLVM
* to be used in unit test.
@@ -136,6 +149,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
(uintptr_t)CountersBegin - (uintptr_t)DataBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
#undef INSTR_PROF_RAW_HEADER
/* INSTR_PROF_RAW_HEADER end */
@@ -177,13 +192,14 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
/* For memory intrinsic functions size profiling. */
VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
+VALUE_PROF_KIND(IPVK_VTableTarget, 2, "vtable target")
/* These two kinds must be the last to be
* declared. This is to make sure the string
* array created with the template can be
* indexed with the kind value.
*/
VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
+VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last")
#undef VALUE_PROF_KIND
/* VALUE_PROF_KIND end */
@@ -270,12 +286,18 @@ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \
INSTR_PROF_SECT_ENTRY(IPSK_name, \
INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
INSTR_PROF_NAME_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vname, \
+ INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \
+ INSTR_PROF_VNAME_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_vals, \
INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
INSTR_PROF_VALS_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
INSTR_PROF_VNODES_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vtab, \
+ INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \
+ INSTR_PROF_VTAB_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
@@ -646,9 +668,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* FIXME: Please remedy the fixme in the header before bumping the version. */
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 8
+#define INSTR_PROF_RAW_VERSION 9
/* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 10
+#define INSTR_PROF_INDEX_VERSION 11
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 5
@@ -686,9 +708,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
than WIN32 */
#define INSTR_PROF_DATA_COMMON __llvm_prf_data
#define INSTR_PROF_NAME_COMMON __llvm_prf_names
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vnames
#define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
#define INSTR_PROF_VALS_COMMON __llvm_prf_vals
#define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
+#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab
#define INSTR_PROF_COVMAP_COMMON __llvm_covmap
#define INSTR_PROF_COVFUN_COMMON __llvm_covfun
#define INSTR_PROF_ORDERFILE_COMMON __llvm_orderfile
@@ -697,9 +721,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
*/
#define INSTR_PROF_DATA_COFF ".lprfd$M"
#define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_VNAME_COFF ".lprfn$M"
#define INSTR_PROF_CNTS_COFF ".lprfc$M"
#define INSTR_PROF_VALS_COFF ".lprfv$M"
#define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_VTAB_COFF ".lprfvt$M"
#define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
#define INSTR_PROF_COVFUN_COFF ".lcovfun$M"
#define INSTR_PROF_ORDERFILE_COFF ".lorderfile$M"
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index 4433d7bd48871fc..f3afa694e02c27d 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -38,6 +38,12 @@ typedef struct ValueProfNode {
#include "profile/InstrProfData.inc"
} ValueProfNode;
+typedef void *IntPtrT;
+typedef struct VTableProfData {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#include "profile/InstrProfData.inc"
+} VTableProfData;
+
/*!
* \brief Return 1 if profile counters are continuously synced to the raw
* profile via an mmap(). This is in contrast to the default mode, in which
@@ -86,10 +92,14 @@ const __llvm_profile_data *__llvm_profile_begin_data(void);
const __llvm_profile_data *__llvm_profile_end_data(void);
const char *__llvm_profile_begin_names(void);
const char *__llvm_profile_end_names(void);
+const char *__llvm_profile_begin_vnames(void);
+const char *__llvm_profile_end_vnames(void);
char *__llvm_profile_begin_counters(void);
char *__llvm_profile_end_counters(void);
ValueProfNode *__llvm_profile_begin_vnodes();
ValueProfNode *__llvm_profile_end_vnodes();
+VTableProfData *__llvm_profile_begin_vtables();
+VTableProfData *__llvm_profile_end_vtables();
uint32_t *__llvm_profile_begin_orderfile();
/*!
@@ -276,6 +286,12 @@ uint64_t __llvm_profile_get_num_counters(const char *Begin, const char *End);
/*! \brief Get the size of the profile counters section in bytes. */
uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End);
+uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
+ const VTableProfData *End);
+
+uint64_t __llvm_profile_get_vtable_size(const VTableProfData *Begin,
+ const VTableProfData *End);
+
/* ! \brief Given the sizes of the data and counter information, return the
* number of padding bytes before and after the counters, and after the names,
* in the raw profile.
@@ -287,8 +303,10 @@ uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End);
*/
void __llvm_profile_get_padding_sizes_for_counters(
uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize,
+ uint64_t VTableSize, uint64_t VNameSize,
uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters,
- uint64_t *PaddingBytesAfterNames);
+ uint64_t *PaddingBytesAfterNames, uint64_t *PaddingBytesAfterVTable,
+ uint64_t *PaddingBytesAfterVNames);
/*!
* \brief Set the flag that profile data has been dumped to the file.
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index 61ac5d9c0285002..0c36e40444c7344 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -9,6 +9,8 @@
// Note: This is linked into the Darwin kernel, and must remain compatible
// with freestanding compilation. See `darwin_add_builtin_libraries`.
+#include <assert.h>
+
#include "InstrProfiling.h"
#include "InstrProfilingInternal.h"
#include "InstrProfilingPort.h"
@@ -45,9 +47,14 @@ uint64_t __llvm_profile_get_size_for_buffer(void) {
const char *CountersEnd = __llvm_profile_end_counters();
const char *NamesBegin = __llvm_profile_begin_names();
const char *NamesEnd = __llvm_profile_end_names();
+ const VTableProfData *VTableBegin = __llvm_profile_begin_vtables();
+ const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
+ const char *VNamesBegin = __llvm_profile_begin_vnames();
+ const char *VNamesEnd = __llvm_profile_end_vnames();
return __llvm_profile_get_size_for_buffer_internal(
- DataBegin, DataEnd, CountersBegin, CountersEnd, NamesBegin, NamesEnd);
+ DataBegin, DataEnd, CountersBegin, CountersEnd, NamesBegin, NamesEnd,
+ VTableBegin, VTableEnd, VNamesBegin, VNamesEnd);
}
COMPILER_RT_VISIBILITY
@@ -63,6 +70,18 @@ uint64_t __llvm_profile_get_data_size(const __llvm_profile_data *Begin,
const __llvm_profile_data *End) {
return __llvm_profile_get_num_data(Begin, End) * sizeof(__llvm_profile_data);
}
+COMPILER_RT_VISIBILITY
+uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
+ const VTableProfData *End) {
+ intptr_t EndI = (intptr_t)End, BeginI = (intptr_t)Begin;
+ return (EndI + sizeof(VTableProfData) - 1 - BeginI) / sizeof(VTableProfData);
+}
+
+COMPILER_RT_VISIBILITY
+uint64_t __llvm_profile_get_vtable_size(const VTableProfData *Begin,
+ const VTableProfData *End) {
+ return __llvm_profile_get_num_vtable(Begin, End) * sizeof(VTableProfData);
+}
COMPILER_RT_VISIBILITY size_t __llvm_profile_counter_entry_size(void) {
if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE)
@@ -103,46 +122,68 @@ static int needsCounterPadding(void) {
COMPILER_RT_VISIBILITY
void __llvm_profile_get_padding_sizes_for_counters(
uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize,
+ uint64_t VTableSize, uint64_t VNameSize,
uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters,
- uint64_t *PaddingBytesAfterNames) {
+ uint64_t *PaddingBytesAfterNames, uint64_t *PaddingBytesAfterVTable,
+ uint64_t *PaddingBytesAfterVName) {
+ // Counter padding is needed only if continuous mode is enabled.
if (!needsCounterPadding()) {
*PaddingBytesBeforeCounters = 0;
*PaddingBytesAfterCounters =
__llvm_profile_get_num_padding_bytes(CountersSize);
*PaddingBytesAfterNames = __llvm_profile_get_num_padding_bytes(NamesSize);
+ *PaddingBytesAfterVTable = __llvm_profile_get_num_padding_bytes(VTableSize);
+ *PaddingBytesAfterVName = __llvm_profile_get_num_padding_bytes(VNameSize);
return;
}
+ // Value profiling not supported in continuous mode at profile-write time
+ // according to
+ // https://github.com/llvm/llvm-project/blob/e6a007f6b51a661ed3dd8b0210b734b3e9b4354f/compiler-rt/lib/profile/InstrProfilingWriter.c#L328
+ assert(VTableSize == 0 && VNameSize == 0 &&
+ "Value profile not supported for continuous mode");
// In continuous mode, the file offsets for headers and for the start of
// counter sections need to be page-aligned.
*PaddingBytesBeforeCounters =
calculateBytesNeededToPageAlign(sizeof(__llvm_profile_header) + DataSize);
*PaddingBytesAfterCounters = calculateBytesNeededToPageAlign(CountersSize);
*PaddingBytesAfterNames = calculateBytesNeededToPageAlign(NamesSize);
+ // Set these two variables to zero to avoid uninitialized variables
+ // even if VTableSize and VNameSize are asserted to be zero.
+ *PaddingBytesAfterVTable = 0;
+ *PaddingBytesAfterVName = 0;
}
COMPILER_RT_VISIBILITY
uint64_t __llvm_profile_get_size_for_buffer_internal(
const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd,
const char *CountersBegin, const char *CountersEnd, const char *NamesBegin,
- const char *NamesEnd) {
+ const char *NamesEnd, const VTableProfData *VTableBegin,
+ const VTableProfData *VTableEnd, const char *VNamesBegin,
+ const char *VNamesEnd) {
/* Match logic in __llvm_profile_write_buffer(). */
const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char);
uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
uint64_t CountersSize =
__llvm_profile_get_counters_size(CountersBegin, CountersEnd);
+ uint64_t VTableSize = __llvm_profile_get_vtable_size(VTableBegin, VTableEnd);
+ uint64_t VNameSize = (VNamesEnd - VNamesBegin) * sizeof(char);
/* Determine how much padding is needed before/after the counters and after
* the names. */
uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
- PaddingBytesAfterNames;
+ PaddingBytesAfterNames, PaddingBytesAfterVTable, PaddingBytesAfterVNames;
__llvm_profile_get_padding_sizes_for_counters(
- DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
- &PaddingBytesAfterCounters, &PaddingBytesAfterNames);
+ DataSize, CountersSize, NamesSize, VTableSize, VNameSize,
+ &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
+ &PaddingBytesAfterNames, &PaddingBytesAfterVTable,
+ &PaddingBytesAfterVNames);
return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
DataSize + PaddingBytesBeforeCounters + CountersSize +
- PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames;
+ PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames +
+ VTableSize + PaddingBytesAfterVTable + VNameSize +
+ PaddingBytesAfterVNames;
}
COMPILER_RT_VISIBILITY
@@ -163,6 +204,9 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
ProfDataWriter BufferWriter;
initBufferWriter(&BufferWriter, Buffer);
+ // Set virtual table arguments to NULL since they are not supported yet.
return lprofWriteDataImpl(&BufferWriter, DataBegin, DataEnd, CountersBegin,
- CountersEnd, 0, NamesBegin, NamesEnd, 0);
+ CountersEnd, 0, NamesBegin, NamesEnd,
+ NULL /* VTableBegin */, NULL /* VTableEnd */,
+ NULL /* VNamesBegin */, NULL /* VNamesEnd */, 0);
}
diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h
index 360165e32ab3fe2..bce333f933f0ffe 100644
--- a/compiler-rt/lib/profile/InstrProfilingInternal.h
+++ b/compiler-rt/lib/profile/InstrProfilingInternal.h
@@ -18,11 +18,16 @@
* pointers to the live data in memory. This function is probably not what you
* want. Use __llvm_profile_get_size_for_buffer instead. Use this function if
* your program has a custom memory layout.
+ * NOTE: The change of function signature requires modifying c source code
+ * as demonstrated by the existing tests. If this is causing backward
+ * compatible issues, considering adding another function for new use cases.
*/
uint64_t __llvm_profile_get_size_for_buffer_internal(
const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd,
const char *CountersBegin, const char *CountersEnd, const char *NamesBegin,
- const char *NamesEnd);
+ const char *NamesEnd, const VTableProfData *VTableBegin,
+ const VTableProfData *VTableEnd, const char *VNamesBegin,
+ const char *VNamesEnd);
/*!
* \brief Write instrumentation data to the given buffer, given explicit
@@ -154,7 +159,9 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
const __llvm_profile_data *DataEnd,
const char *CountersBegin, const char *CountersEnd,
VPDataReaderType *VPDataReader, const char *NamesBegin,
- const char *NamesEnd, int SkipNameDataWrite);
+ const char *NamesEnd, const VTableProfData *VTableBegin,
+ const VTableProfData *VTableEnd, const char *VNamesBegin,
+ const char *VNamesEnd, int SkipNameDataWrite);
/* Merge value profile data pointed to by SrcValueProfData into
* in-memory profile counters pointed by to DstData. */
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index 9cf12f251f7262d..2ef6227599ff139 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -124,9 +124,27 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
SrcCountersEnd = SrcCountersStart +
Header->NumCounters * __llvm_profile_counter_entry_size();
SrcNameStart = SrcCountersEnd;
- SrcValueProfDataStart =
+ // This is to assume counter size is a multiple of 8 bytes.
+ // uint64_t NamesSize = Header->NamesSize;
+ // uint64_t PaddingBytesAfterNames =
+ // __llvm_profile_get_num_padding_bytes(Header->NamesSize);
+ // First, skip rather than merge them
+ uint64_t VTableSectionSize = Header->NumVTables * sizeof(VTableProfData);
+ uint64_t PaddingBytesAfterVTableSection =
+ __llvm_profile_get_num_padding_bytes(VTableSectionSize);
+ uint64_t VNamesSize = Header->VNamesSize;
+ uint64_t PaddingBytesAfterVNamesSize =
+ __llvm_profile_get_num_padding_bytes(VNamesSize);
+
+ uint64_t VTableProfDataOffset =
SrcNameStart + Header->NamesSize +
__llvm_profile_get_num_padding_bytes(Header->NamesSize);
+
+ uint64_t VTableNamesOffset =
+ VTableProfDataOffset + VTableSectionSize + PaddingBytesAfterVTableSection;
+
+ SrcValueProfDataStart =
+ VTableNamesOffset + VNamesSize + PaddingBytesAfterVNamesSize;
if (SrcNameStart < SrcCountersStart)
return 1;
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 2cce0a4b2c48d35..dc861632271ce79 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -33,8 +33,12 @@
#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
#define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
+#define PROF_VNAME_START INSTR_PROF_SECT_START(INSTR_PROF_VNAME_COMMON)
+#define PROF_VNAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNAME_COMMON)
#define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
#define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
+#define PROF_VTABLE_START INSTR_PROF_SECT_START(INSTR_PROF_VTAB_COMMON)
+#define PROF_VTABLE_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VTAB_COMMON)
#define PROF_ORDERFILE_START INSTR_PROF_SECT_START(INSTR_PROF_ORDERFILE_COMMON)
#define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_COMMON)
#define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_COMMON)
@@ -48,6 +52,10 @@ extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY
COMPILER_RT_WEAK;
extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern VTableProfData PROF_VTABLE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern V...
[truncated]
|
Just a warning that I am planning to push a significant change to enable MC/DC within the next few hours that will cause some conflicts with what you've done here. I apologize for the inconvenience! |
Thanks for the heads up! I expect this to happen and get prepared to merge conflicts ;) |
The work sounds interesting. Can you provide a bit more context about it? Will it be used to improve ICP when it's sufficient to just compare the vtable address instead of the vfunc address? |
yes -- it can not only eliminate vtable load, but also enable target check combining. What is more important is that it can be combined with more aggressive interprocedural type propagation that enables full (unconditional) devirtualization. Example: base->foo(); |
Thanks for the illustration! Have you enabled this in your fleet, and how much performance improvement have you seen? We've been also thinking about similar work based on sample PGO, in both the compiler and bolt. cc @WenleiHe |
I'm prototyping vtable comparison (to eliminate the additional load conditionally to avoid the downside mentioned https://groups.google.com/g/llvm-dev/c/_1kughXhjIY/m/7fzCmsuVmS8J) and it works for small test programs. I will collect the numbers for macro benchmarks. |
There is a great potential with performance. It depends on transformation strategy, and other optimizations enabled by the type information. The legacy indirection promotion for virtual calls also need to be turned off after this is ready. |
Haven't looked at the changes, but from the conversations above this looks like a useful change. Though I think the big picture context/motivation can go into the change summary to help others understand better. Maybe even a small RFC would be helpful to explain the complete plan you have.
Today LLVM doesn't have inter-procedural type propagation yet, right? or is this something you started working on now? |
This is part of the plan -- the first stage is the producer related, and type prop is one possible consumer. Static type propagation (without VPT) tries to prop type from object creation site to uses -- it can be quite limited due to merging effect. Another static method is to do virtual function cloning (a clone is a function which the runtime type is known that can be propagated) and vtable of the owner type will be updated with the clones. The downside is potential large code bloat. |
Interesting work! On my side I was thinking about a different approach using existing profiling and the whole program devirtualization (WPD) framework:
Directly value profiling the vtable captures more opportunities where the member function can exist in more than 1 vtable however getting that information in sample profiling is trickier which makes the previous approach more appealing. |
I tested a prototype (using the simplest heuristic to do vtable comparison only if the distribution of vtable is the same as vfunc distribution) on one internal workloads. It shows a statistically significant +0.26% qps improvement on one search workload, cpu usage reductions on two other workloads and mostly neutral for a database. The numbers are initial without tuning (e.g., what about do vtable comparison if there are two vtable values and one function, etc) |
Thanks for sharing thoughts! I read a design doc from @teresajohnson that compares this two options; the doc points out type instrumentation could tell type distributions more accurately (obviously with instrumentation runtime overhead) For example,
|
Updated the change summary to explain the motivation. I will prepare a small RFC after spending some time tuning the vtable comparison heuristics. Right now the heuristic is strict (one vtable, one func; two vtable, two func, etc). |
Yes there are tradeoffs to doing this purely with whole program class hierarchy analysis vs with profiled type info, and in fact they can be complementary. For example, the profile info can indicate what order to do the vtable comparisons (i.e. descending order of hotness, as we do for vfunc comparisons in current ICP), while WP CHA can be used to determine when no fallback is required. Also, another advantage of doing this with profiling is also that it does not require WP visibility, which may be difficult to guarantee. |
New changes since last review besides clang-formats
|
1. delete braces on single-line if/else/loop 2. In InstrProfReader.cpp, 'unique_ptr<Type> var = make_unique' -> 'auto var = make_unique' 3. In ValueProfilePlugins.inc, fix comment lines
…type from GlobalValue to GlobalObject
Besides the non-elf warning, I did some clean-ups (e.g., delete braces around one-line if/loop and merges. Planning to merge this one in by end of the week if no objections or concerns. It has been a long time, and thanks for the reviews on this one and the smaller nfc patches. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here are some things I noticed, haven't looked at the tests yet.
|
||
void visitCallBase(CallBase &Call) { | ||
if (Call.isIndirectCall()) | ||
if (Call.isIndirectCall()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we flip this condition and return early to reduce the nesting?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
I also updated the code such that 'IndirectCalls' is maintained iff 'Type' is 'kIndirectCall' and 'ProfiledAddresses' is maintained iff 'Type' is 'kVTableVal'.
@@ -429,20 +439,36 @@ uint64_t ComputeHash(StringRef K); | |||
class InstrProfSymtab { | |||
public: | |||
using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>; | |||
using RangeHashMap = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, it's a little strange to have HashMap in the name and the underlying data structure is a vector. How about something like VTableAddrRanges
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As the other comments point out, I added a struct to replace nested pairs and used std::vector<Struct>
as the type here.
@@ -429,20 +439,36 @@ uint64_t ComputeHash(StringRef K); | |||
class InstrProfSymtab { | |||
public: | |||
using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>; | |||
using RangeHashMap = | |||
std::vector<std::pair<std::pair<uint64_t, uint64_t>, uint64_t>>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you change the element type to a structure with 3 elements? E.g. struct { uint64_t Start, End, Md5Value }
;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done, and making the struct definition private to class 'InstrProfSymtab'.
// A map from MD5 keys to function name strings. | ||
std::vector<std::pair<uint64_t, StringRef>> MD5NameMap; | ||
// A map from MD5 keys to virtual table definitions. Only populated when | ||
// building the Symtab from a module. | ||
std::vector<std::pair<uint64_t, GlobalVariable *>> MD5VTableMap; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Having Map
in the name is confusing when the data structure is a vector. In this case I think using a DenseMap is better since the keys will be random and spread apart. So it should have better performance than log(N) operations necessary for lookup in a sorted vector.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed the type to DenseMap.
if (Error E = addSymbolName(VTableName)) | ||
return E; | ||
|
||
// Record VTableName. InstrProfWriter uses this map. The comment around |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: "uses this set".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
|
||
std::string VarName = getInstrProfVTableVarPrefix().str() + PGOVTableName; | ||
auto *Data = | ||
new GlobalVariable(M, DataTy, false /* constant */, Linkage, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: /*constant=*/
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
#include "llvm/ProfileData/InstrProfData.inc" | ||
}; | ||
|
||
std::string VarName = getInstrProfVTableVarPrefix().str() + PGOVTableName; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The GlobalVariable constructor accepts a Twine so perhaps use that directly in the args?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
@@ -327,6 +327,11 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts; | |||
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= | |||
extern cl::opt<std::string> ViewBlockFreqFuncName; | |||
|
|||
extern cl::opt<bool> DebugInfoCorrelate; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment for this like the ones above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the catch. As the diff shows, this 'extern' doesn't show up in top-of-the-tree; and I think I forgot to clean this up in 'git merge'. Removed it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here are some things I noticed, haven't looked at the tests yet.
@snehasish Thanks for the review!
Talking about tests, I wonder if it looks better if I change the current LLVM IR test under llvm/test/tools/llvm-profdata/
into a compiler-rt test under compiler-rt/test/profile/Linux
, demonstrated in 167d5ce
A compiler-rt test requires less files (e.g., no need to submit .profraw
or its re-generation shell script in the llvm repo) and more self-contained, but it's put under profile/Linux
so people using non-Linux environments won't run it (less coverage compared with LLVM IR test in this sense). However the support is limited to ELF.
- The compiler-rt test should work for Darwins (apple). But since this test involves matching mangled names, the matchers could be exact (no regex matching) if test could assume it runs only on ELF.
|
||
void visitCallBase(CallBase &Call) { | ||
if (Call.isIndirectCall()) | ||
if (Call.isIndirectCall()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
I also updated the code such that 'IndirectCalls' is maintained iff 'Type' is 'kIndirectCall' and 'ProfiledAddresses' is maintained iff 'Type' is 'kVTableVal'.
if (Error E = addSymbolName(VTableName)) | ||
return E; | ||
|
||
// Record VTableName. InstrProfWriter uses this map. The comment around |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
|
||
std::string VarName = getInstrProfVTableVarPrefix().str() + PGOVTableName; | ||
auto *Data = | ||
new GlobalVariable(M, DataTy, false /* constant */, Linkage, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
@@ -327,6 +327,11 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts; | |||
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= | |||
extern cl::opt<std::string> ViewBlockFreqFuncName; | |||
|
|||
extern cl::opt<bool> DebugInfoCorrelate; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the catch. As the diff shows, this 'extern' doesn't show up in top-of-the-tree; and I think I forgot to clean this up in 'git merge'. Removed it.
#include "llvm/ProfileData/InstrProfData.inc" | ||
}; | ||
|
||
std::string VarName = getInstrProfVTableVarPrefix().str() + PGOVTableName; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
llvm/lib/ProfileData/InstrProf.cpp
Outdated
uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) { | ||
finalizeSymtab(); | ||
auto It = lower_bound( | ||
VTableAddrRangeToMD5Map, Address, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Edited)
IntervalMap could save the {sort, unique, lower_bound} over vectors, and the custom struct VTableProfData
. I tried to use it, the code might look a little quirky w.r.t class method interfaces. Other than how the interface becomes, IntervalMap makes more sense to me and I could make the change as described.
a) This line uses the implicit move assignment operator of InstrProfSymtab
.
b) IntervalMap
constructor requires an allocator, and the allocator is owned outside of interval map.
c) Allocator doesn't have implicit move assignment operator because it defines destructor.
Considering a) and c), if 'InstrProfSymtab' has 'allocator' as a member, it needs to implement move assignment operator. Alternatively, since only raw profile reader sees profiled address and uses this map, raw profile reader could have an allocator member and pass it to InstrSymtab::create
for interval map creation. But this way the code doesn't look very clean either.
Re coalescing, I took another look. IntervalMap supports half-open intervals and only coalesces intervals with the same value, so it should work.
(1) IntervalMap coalesces adjacent ranges. )[a, b)
and [c, d)
are considered adjacent b + 1 == c
with the default trait. While vtables in .data.rel.ro
may not be adjacent in real binaries, a custom trait which doesn't coalesce adjacent ranges is more foolproof and less likely to go wrong.
My concern with this approach is that compiler-rt is treated as a different project and updating the code within LLVM makes it easy to miss running the test locally for the other project. I think such issues will be caught by the buildbot but having it flagged earlier is better for the developer. What do you think? |
Yeah presubmit test coverage makes sense. If it's desirable to get rid of 'update_vtable_value_prof_inputs.sh' and 'vtable-value-prof-basic.profraw' in the repo, here is another way:
I wonder if that is preferred over the current status (with script and |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Responses to a couple of other comments -
- For LLVM IR test coverage, store textual profiles in the repo, and run
llvm-profdata merge
to convert it to indexed profiles.- To have test coverage on raw profiles (generate raw profiles or convert it into other formats), have a compiler-rt test.
I wonder if that is preferred over the current status (with script and
.profraw
in the repo).
I think we should resort to scripts and profraw in LLVM if we don't have support for textual format. This is the case for memprof for example. If we support vtablenames in the text format, I would prefer the additional llvm-profdata merge
based testing.
Considering a) and c), if 'InstrProfSymtab' has 'allocator' as a member, it needs to implement move assignment operator.
I think this is the way to go if you choose to adopt the IntervalMap suggestion. It seems cleanest if the allocator and the map are owned by the InstrProfSymtab class.
llvm/lib/ProfileData/InstrProf.cpp
Outdated
@@ -560,6 +602,28 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { | |||
return Error::success(); | |||
} | |||
|
|||
uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) { | |||
finalizeSymtab(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Summarizing offline discussion - if we chose to refactor it makes more sense to add appropriate factory methods of cases addElement
are called. This seems to be the intent of the original implementation with a few different create
methods. Then we can make addElement
private. This will enforce what we want through a better API rather than runtime checks.
For this patch, we can add a comment and look into the refactor later.
- The direct use case (in [1]) is to add `llvm::IntervalMap` [2] and the allocator required by IntervalMap ctor [3] to class `InstrProfSymtab` as owned members. The allocator class doesn't have a move-assignment operator; and it's going to take much effort to implement move-assignment operator for the allocator class such that the enclosing class is movable. - There is only one use of compiler-generated move-assignment operator in the repo, which is in CoverageMappingReader.cpp. Luckily it's possible to use std::unique_ptr<InstrProfSymtab> instead, so did the change. [1] #66825 [2] https://github.com/llvm/llvm-project/blob/4c2f68840e984b0f111779c46845ac00e3a7547d/llvm/include/llvm/ADT/IntervalMap.h#L936 [3] https://github.com/llvm/llvm-project/blob/4c2f68840e984b0f111779c46845ac00e3a7547d/llvm/include/llvm/ADT/IntervalMap.h#L1041
Got it. I updated the test cases. Now compiler-rt test provides raw-related test coverage, and IR test provides test coverage for the {show, merge} related with {indexed, text} formats.
Agree. With #86882, InstrProfSymtab could own both. Thanks for the discussion!
|
…is PR and meant for later ones
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
// RUN: env LLVM_PROFILE_FILE=test.profraw ./test | ||
|
||
// Show vtable profiles from raw profile. | ||
// RUN: llvm-profdata show --function=main --ic-targets -show-vtables test.profraw | FileCheck %s --check-prefixes=COMMON,RAW |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: the show-vtables option has a single hyphen here as opposed to double elsewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed it.
// RUN: rm -rf %t && split-file %s %t && cd %t | ||
// | ||
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling test.cpp -o test | ||
// RUN: env LLVM_PROFILE_FILE=test.profraw ./test |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[optional] If you prefix the test.profraw as %t-test.profraw
then when the test fails and prints out the failed command line its easier to copy paste and run separately to debug. If you want to adopt this then all the generated files should be prefixed with %t
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
// write out an on-disk chained hash table of virtual table names. | ||
// InstrProfWriter stores per function profile data (keyed by function names) | ||
// so it doesn't use a StringSet for function names. | ||
StringSet<> VTableNames; | ||
// A map from MD5 keys to function name strings. | ||
std::vector<std::pair<uint64_t, StringRef>> MD5NameMap; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be worth it to simplify this to use DenseMap too in a separate patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure. i'll send out another patch later.
std::unique_ptr<InstrProfValueData[]> VD = | ||
R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC); | ||
|
||
EXPECT_EQ(3U * getProfWeight(), VD[0].Count); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gunit expectations should be written as EXPECT_EQ(actual, expected)
. Looks like these are reversed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gunit expectations should be written as EXPECT_EQ(actual, expected).
I see. Used EXPECT_EQ(actual, expected)
in new lines, and updated some surrounding existing lines in this style in this PR. Will send out a patch later to update the rest of EXPECT_EQ
to make the style consistent.
The llvm/test/Transforms/PGOProfile/vtable_profile.ll test seems to check the data using a specific version of libz. For folks like me who has to link with an older version of libz (i.e. |
Sorry for this. I created #87449 to remove the string check and will wait a while for the presubmit checks and submit it soon. |
(The profile format change is split into a standalone change into #81691)
For InstrFDO value profiling, implement instrumentation and lowering for virtual table address.
-enable-vtable-value-profiling
and off by default.VTableProfData
structs and compressed vtables as payloads.Implement profile reader and writer support
llvm-profdata
but not compiler. Raw profile reader will construct InstrProfSymtab with symbol names, and map profiled runtime address to vtable symbols.llvm-profdata
and compiler. When initialized, the reader stores a pointer to the beginning of in-memory compressed vtable names and the length of string. When used inllvm-profdata
, reader decompress the string to show symbols of a profiled site. When used in compiler, string decompression doesn't happen since IR is used to construct InstrProfSymtab.llvm-profdata show -show-vtables <args> <profile>
is implemented.rfc in https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600#pick-instrumentation-points-and-instrument-runtime-types-7