Skip to content

Commit 0537f63

Browse files
committed
Merge tag 'llvmorg-17.0.0-rc4' of https://github.com/llvm/llvm-project into rustc/17.0-2023-07-29
LLVM 17.0.0-rc4 Release
2 parents 50eecd0 + 092b6c5 commit 0537f63

File tree

236 files changed

+5798
-1013
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

236 files changed

+5798
-1013
lines changed

.github/workflows/libclang-abi-tests.yml

+2
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,12 @@ jobs:
146146
uses: actions/download-artifact@v3
147147
with:
148148
name: build-baseline
149+
path: build-baseline
149150
- name: Download latest
150151
uses: actions/download-artifact@v3
151152
with:
152153
name: build-latest
154+
path: build-latest
153155

154156
- name: Install abi-compliance-checker
155157
run: sudo apt-get install abi-compliance-checker

bolt/lib/Rewrite/RewriteInstance.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1844,8 +1844,9 @@ void RewriteInstance::adjustCommandLineOptions() {
18441844
exit(1);
18451845
}
18461846

1847-
if (opts::ReorderFunctions != ReorderFunctions::RT_NONE &&
1848-
!opts::HotText.getNumOccurrences()) {
1847+
if (opts::Instrument ||
1848+
(opts::ReorderFunctions != ReorderFunctions::RT_NONE &&
1849+
!opts::HotText.getNumOccurrences())) {
18491850
opts::HotText = true;
18501851
} else if (opts::HotText && !BC->HasRelocations) {
18511852
errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n";

bolt/runtime/common.h

+24
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,20 @@ int memcmp(const void *s1, const void *s2, size_t n) {
165165
// Anonymous namespace covering everything but our library entry point
166166
namespace {
167167

168+
// Get the difference between runtime addrress of .text section and
169+
// static address in section header table. Can be extracted from arbitrary
170+
// pc value recorded at runtime to get the corresponding static address, which
171+
// in turn can be used to search for indirect call description. Needed because
172+
// indirect call descriptions are read-only non-relocatable data.
173+
uint64_t getTextBaseAddress() {
174+
uint64_t DynAddr;
175+
uint64_t StaticAddr;
176+
__asm__ volatile("leaq __hot_end(%%rip), %0\n\t"
177+
"movabsq $__hot_end, %1\n\t"
178+
: "=r"(DynAddr), "=r"(StaticAddr));
179+
return DynAddr - StaticAddr;
180+
}
181+
168182
constexpr uint32_t BufSize = 10240;
169183

170184
#define _STRINGIFY(x) #x
@@ -458,6 +472,16 @@ uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
458472
return ret;
459473
}
460474

475+
int __ftruncate(uint64_t fd, uint64_t length) {
476+
int ret;
477+
__asm__ __volatile__("movq $77, %%rax\n"
478+
"syscall\n"
479+
: "=a"(ret)
480+
: "D"(fd), "S"(length)
481+
: "cc", "rcx", "r11", "memory");
482+
return ret;
483+
}
484+
461485
int __close(uint64_t fd) {
462486
uint64_t ret;
463487
__asm__ __volatile__("movq $3, %%rax\n"

bolt/runtime/instr.cpp

+23-10
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,12 @@ class BumpPtrAllocator {
215215
/// __bolt_instr_setup, our initialization routine.
216216
BumpPtrAllocator *GlobalAlloc;
217217

218+
// Base address which we substract from recorded PC values when searching for
219+
// indirect call description entries. Needed because indCall descriptions are
220+
// mapped read-only and contain static addresses. Initialized in
221+
// __bolt_instr_setup.
222+
uint64_t TextBaseAddress = 0;
223+
218224
// Storage for GlobalAlloc which can be shared if not using
219225
// instrumentation-file-append-pid.
220226
void *GlobalMetadataStorage;
@@ -1389,7 +1395,7 @@ void visitIndCallCounter(IndirectCallHashTable::MapEntry &Entry,
13891395
const IndCallDescription *CallsiteDesc =
13901396
&Ctx->IndCallDescriptions[CallsiteID];
13911397
const IndCallTargetDescription *TargetDesc =
1392-
Ctx->lookupIndCallTarget(Entry.Key);
1398+
Ctx->lookupIndCallTarget(Entry.Key - TextBaseAddress);
13931399
if (!TargetDesc) {
13941400
DEBUG(report("Failed to lookup indirect call target\n"));
13951401
char LineBuf[BufSize];
@@ -1515,20 +1521,22 @@ extern "C" void __bolt_instr_clear_counters() {
15151521
/// on demand.
15161522
///
15171523
extern "C" void __attribute((force_align_arg_pointer))
1518-
__bolt_instr_data_dump() {
1524+
__bolt_instr_data_dump(int FD) {
15191525
// Already dumping
15201526
if (!GlobalWriteProfileMutex->acquire())
15211527
return;
15221528

1529+
int ret = __lseek(FD, 0, SEEK_SET);
1530+
assert(ret == 0, "Failed to lseek!");
1531+
ret = __ftruncate(FD, 0);
1532+
assert(ret == 0, "Failed to ftruncate!");
15231533
BumpPtrAllocator HashAlloc;
15241534
HashAlloc.setMaxSize(0x6400000);
15251535
ProfileWriterContext Ctx = readDescriptions();
15261536
Ctx.CallFlowTable = new (HashAlloc, 0) CallFlowHashTable(HashAlloc);
15271537

15281538
DEBUG(printStats(Ctx));
15291539

1530-
int FD = openProfile();
1531-
15321540
BumpPtrAllocator Alloc;
15331541
Alloc.setMaxSize(0x6400000);
15341542
const uint8_t *FuncDesc = Ctx.FuncDescriptions;
@@ -1544,7 +1552,6 @@ __bolt_instr_data_dump() {
15441552
Ctx.CallFlowTable->forEachElement(visitCallFlowEntry, FD, &Ctx);
15451553

15461554
__fsync(FD);
1547-
__close(FD);
15481555
__munmap(Ctx.MMapPtr, Ctx.MMapSize);
15491556
__close(Ctx.FileDesc);
15501557
HashAlloc.destroy();
@@ -1557,6 +1564,7 @@ __bolt_instr_data_dump() {
15571564
void watchProcess() {
15581565
timespec ts, rem;
15591566
uint64_t Ellapsed = 0ull;
1567+
int FD = openProfile();
15601568
uint64_t ppid;
15611569
if (__bolt_instr_wait_forks) {
15621570
// Store parent pgid
@@ -1568,7 +1576,7 @@ void watchProcess() {
15681576
ppid = __getppid();
15691577
if (ppid == 1) {
15701578
// Parent already dead
1571-
__bolt_instr_data_dump();
1579+
__bolt_instr_data_dump(FD);
15721580
goto out;
15731581
}
15741582
}
@@ -1581,21 +1589,22 @@ void watchProcess() {
15811589
// so no need for us to keep dumping.
15821590
if (__kill(ppid, 0) < 0) {
15831591
if (__bolt_instr_no_counters_clear)
1584-
__bolt_instr_data_dump();
1592+
__bolt_instr_data_dump(FD);
15851593
break;
15861594
}
15871595

15881596
if (++Ellapsed < __bolt_instr_sleep_time)
15891597
continue;
15901598

15911599
Ellapsed = 0;
1592-
__bolt_instr_data_dump();
1600+
__bolt_instr_data_dump(FD);
15931601
if (__bolt_instr_no_counters_clear == false)
15941602
__bolt_instr_clear_counters();
15951603
}
15961604

15971605
out:;
15981606
DEBUG(report("My parent process is dead, bye!\n"));
1607+
__close(FD);
15991608
__exit(0);
16001609
}
16011610

@@ -1606,6 +1615,7 @@ extern "C" void __bolt_instr_indirect_tailcall();
16061615
extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
16071616
__bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call;
16081617
__bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall;
1618+
TextBaseAddress = getTextBaseAddress();
16091619

16101620
const uint64_t CountersStart =
16111621
reinterpret_cast<uint64_t>(&__bolt_instr_locations[0]);
@@ -1691,8 +1701,11 @@ extern "C" __attribute((naked)) void __bolt_instr_start()
16911701
/// This is hooking into ELF's DT_FINI
16921702
extern "C" void __bolt_instr_fini() {
16931703
__bolt_fini_trampoline();
1694-
if (__bolt_instr_sleep_time == 0)
1695-
__bolt_instr_data_dump();
1704+
if (__bolt_instr_sleep_time == 0) {
1705+
int FD = openProfile();
1706+
__bolt_instr_data_dump(FD);
1707+
__close(FD);
1708+
}
16961709
DEBUG(report("Finished.\n"));
16971710
}
16981711

bolt/test/lit.cfg.py

+3
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@
7272
if config.gnu_ld:
7373
config.available_features.add("gnu_ld")
7474

75+
if lit.util.which("fuser"):
76+
config.available_features.add("fuser")
77+
7578
llvm_config.use_default_substitutions()
7679

7780
llvm_config.config.environment["CLANG"] = config.bolt_clang
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
// Check that indirect call hash tables properly register multiple calls,
2+
// and that calls from different processes don't get mixed up when using
3+
// --instrumentation-file-append-pid.
4+
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <unistd.h>
8+
9+
__attribute__((noinline)) void funcA(int pid) { printf("funcA %d\n", pid); }
10+
__attribute__((noinline)) void funcB(int pid) { printf("funcB %d\n", pid); }
11+
__attribute__((noinline)) void funcC(int pid) { printf("funcC %d\n", pid); }
12+
__attribute__((noinline)) void funcD(int pid) { printf("funcD %d\n", pid); }
13+
__attribute__((noinline)) void funcE(int pid) { printf("funcE %d\n", pid); }
14+
__attribute__((noinline)) void funcF(int pid) { printf("funcF %d\n", pid); }
15+
__attribute__((noinline)) void funcG(int pid) { printf("funcG %d\n", pid); }
16+
__attribute__((noinline)) void funcH(int pid) { printf("funcH %d\n", pid); }
17+
__attribute__((noinline)) void funcI(int pid) { printf("funcI %d\n", pid); }
18+
__attribute__((noinline)) void funcJ(int pid) { printf("funcJ %d\n", pid); }
19+
__attribute__((noinline)) void funcK(int pid) { printf("funcK %d\n", pid); }
20+
__attribute__((noinline)) void funcL(int pid) { printf("funcL %d\n", pid); }
21+
__attribute__((noinline)) void funcM(int pid) { printf("funcM %d\n", pid); }
22+
__attribute__((noinline)) void funcN(int pid) { printf("funcN %d\n", pid); }
23+
__attribute__((noinline)) void funcO(int pid) { printf("funcO %d\n", pid); }
24+
__attribute__((noinline)) void funcP(int pid) { printf("funcP %d\n", pid); }
25+
26+
int main() {
27+
28+
void (*funcs[])(int) = {funcA, funcB, funcC, funcD, funcE, funcF,
29+
funcG, funcH, funcI, funcJ, funcK, funcL,
30+
funcM, funcN, funcO, funcP};
31+
int i;
32+
33+
switch (fork()) {
34+
case -1:
35+
printf("Failed to fork!\n");
36+
exit(-1);
37+
break;
38+
case 0:
39+
i = 0;
40+
break;
41+
default:
42+
i = 1;
43+
break;
44+
}
45+
int pid = getpid();
46+
for (; i < sizeof(funcs) / sizeof(void *); i += 2) {
47+
funcs[i](pid);
48+
}
49+
50+
return 0;
51+
}
52+
/*
53+
REQUIRES: system-linux,shell,fuser
54+
55+
RUN: %clang %cflags %s -o %t.exe -Wl,-q -pie -fpie
56+
57+
RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
58+
RUN: --conservative-instrumentation -o %t.instrumented_conservative \
59+
RUN: --instrumentation-sleep-time=1 --instrumentation-no-counters-clear \
60+
RUN: --instrumentation-wait-forks
61+
62+
# Instrumented program needs to finish returning zero
63+
# Both output and profile must contain all 16 functions
64+
RUN: %t.instrumented_conservative > %t.output
65+
# Wait for profile and output to be fully written
66+
RUN: bash %S/wait_file.sh %t.output
67+
RUN: bash %S/wait_file.sh %t.fdata
68+
RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT
69+
RUN: cat %t.fdata | FileCheck %s --check-prefix=CHECK-COMMON-PROF
70+
71+
CHECK-OUTPUT-DAG: funcA
72+
CHECK-OUTPUT-DAG: funcB
73+
CHECK-OUTPUT-DAG: funcC
74+
CHECK-OUTPUT-DAG: funcD
75+
CHECK-OUTPUT-DAG: funcE
76+
CHECK-OUTPUT-DAG: funcF
77+
CHECK-OUTPUT-DAG: funcG
78+
CHECK-OUTPUT-DAG: funcH
79+
CHECK-OUTPUT-DAG: funcI
80+
CHECK-OUTPUT-DAG: funcJ
81+
CHECK-OUTPUT-DAG: funcK
82+
CHECK-OUTPUT-DAG: funcL
83+
CHECK-OUTPUT-DAG: funcM
84+
CHECK-OUTPUT-DAG: funcN
85+
CHECK-OUTPUT-DAG: funcO
86+
CHECK-OUTPUT-DAG: funcP
87+
88+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcA 0 0 1
89+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcB 0 0 1
90+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcC 0 0 1
91+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcD 0 0 1
92+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcE 0 0 1
93+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcF 0 0 1
94+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcG 0 0 1
95+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcH 0 0 1
96+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcI 0 0 1
97+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcJ 0 0 1
98+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcK 0 0 1
99+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcL 0 0 1
100+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcM 0 0 1
101+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcN 0 0 1
102+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcO 0 0 1
103+
CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcP 0 0 1
104+
105+
RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t \
106+
RUN: --instrumentation-file-append-pid \
107+
RUN: -o %t.instrumented
108+
109+
RUN: %t.instrumented > %t.output
110+
# Wait till output is fully written in case child outlives parent
111+
RUN: bash %S/wait_file.sh %t.output
112+
# Make sure all functions were called
113+
RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT
114+
115+
RUN: child_pid=$(cat %t.output | grep funcA | awk '{print $2;}')
116+
RUN: par_pid=$(cat %t.output | grep funcB | awk '{print $2;}')
117+
118+
RUN: bash %S/wait_file.sh %t.$child_pid.fdata
119+
RUN: bash %S/wait_file.sh %t.$par_pid.fdata
120+
121+
RUN: mv %t.$child_pid.fdata %t.child.fdata
122+
RUN: mv %t.$par_pid.fdata %t.parent.fdata
123+
124+
# Instrumented binary must produce two profiles with only local calls
125+
# recorded. Functions called only in child should not appear in parent's
126+
# process and vice versa.
127+
RUN: cat %t.child.fdata | FileCheck %s --check-prefix=CHECK-CHILD
128+
RUN: cat %t.child.fdata | FileCheck %s --check-prefix=CHECK-NOCHILD
129+
RUN: cat %t.parent.fdata | FileCheck %s --check-prefix=CHECK-PARENT
130+
RUN: cat %t.parent.fdata | FileCheck %s --check-prefix=CHECK-NOPARENT
131+
132+
CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcA 0 0 1
133+
CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcC 0 0 1
134+
CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcE 0 0 1
135+
CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcG 0 0 1
136+
CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcI 0 0 1
137+
CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcK 0 0 1
138+
CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcM 0 0 1
139+
CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcO 0 0 1
140+
141+
CHECK-NOCHILD-NOT: funcB
142+
CHECK-NOCHILD-NOT: funcD
143+
CHECK-NOCHILD-NOT: funcF
144+
CHECK-NOCHILD-NOT: funcH
145+
CHECK-NOCHILD-NOT: funcJ
146+
CHECK-NOCHILD-NOT: funcL
147+
CHECK-NOCHILD-NOT: funcN
148+
CHECK-NOCHILD-NOT: funcP
149+
150+
CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcB 0 0 1
151+
CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcD 0 0 1
152+
CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcF 0 0 1
153+
CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcH 0 0 1
154+
CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcJ 0 0 1
155+
CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcL 0 0 1
156+
CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcN 0 0 1
157+
CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcP 0 0 1
158+
159+
CHECK-NOPARENT-NOT: funcA
160+
CHECK-NOPARENT-NOT: funcC
161+
CHECK-NOPARENT-NOT: funcE
162+
CHECK-NOPARENT-NOT: funcG
163+
CHECK-NOPARENT-NOT: funcI
164+
CHECK-NOPARENT-NOT: funcK
165+
CHECK-NOPARENT-NOT: funcM
166+
CHECK-NOPARENT-NOT: funcO
167+
168+
*/

0 commit comments

Comments
 (0)