Skip to content

Commit 72ce50b

Browse files
committed
Merge remote-tracking branch 'intel/sycl' into cpu-range-reduction-performance
2 parents 1726491 + fe18839 commit 72ce50b

File tree

10,735 files changed

+648523
-851204
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

10,735 files changed

+648523
-851204
lines changed

.github/workflows/sycl_nightly.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@ jobs:
1919
build_configure_extra_args: ''
2020
lts_config: "ocl_gen9;ocl_x64"
2121

22+
ubuntu2004_opaque_pointers_build_test:
23+
if: github.repository == 'intel/llvm'
24+
uses: ./.github/workflows/sycl_linux_build_and_test.yml
25+
with:
26+
build_cache_root: "/__w/"
27+
build_cache_suffix: opaque_pointers
28+
build_artifact_suffix: opaque_pointers
29+
build_configure_extra_args: "--hip --cuda --enable-esimd-emulator --cmake-opt=-DDPCPP_ENABLE_OPAQUE_POINTERS=TRUE"
30+
lts_config: "ocl_gen9;ocl_x64"
31+
2232
windows_default:
2333
name: Windows
2434
if: github.repository == 'intel/llvm'

bolt/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ Once you have `perf.fdata` ready, you can use it for optimizations with
180180
BOLT. Assuming your environment is setup to include the right path, execute
181181
`llvm-bolt`:
182182
```
183-
$ llvm-bolt <executable> -o <executable>.bolt -data=perf.fdata -reorder-blocks=ext-tsp -reorder-functions=hfsort -split-functions=2 -split-all-cold -split-eh -dyno-stats
183+
$ llvm-bolt <executable> -o <executable>.bolt -data=perf.fdata -reorder-blocks=ext-tsp -reorder-functions=hfsort -split-functions -split-all-cold -split-eh -dyno-stats
184184
```
185185

186186
If you do need an updated debug info, then add `-update-debug-sections` option

bolt/docs/OptimizingClang.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ Notice that we are passing `clang-7` to `perf2bolt` which is the real binary tha
6464
the generated profile:
6565
```bash
6666
$ llvm-bolt $CPATH/clang-7 -o $CPATH/clang-7.bolt -b clang-7.yaml \
67-
-reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 \
67+
-reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions \
6868
-split-all-cold -dyno-stats -icf=1 -use-gnu-stack
6969
```
7070
The output will look similar to the one below:

bolt/include/bolt/Core/BinaryBasicBlock.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H
1616
#define BOLT_CORE_BINARY_BASIC_BLOCK_H
1717

18+
#include "bolt/Core/FunctionLayout.h"
1819
#include "bolt/Core/MCPlus.h"
1920
#include "llvm/ADT/GraphTraits.h"
2021
#include "llvm/ADT/StringRef.h"
@@ -634,14 +635,12 @@ class BinaryBasicBlock {
634635

635636
/// Test if BB is a predecessor of this block.
636637
bool isPredecessor(const BinaryBasicBlock *BB) const {
637-
auto Itr = std::find(Predecessors.begin(), Predecessors.end(), BB);
638-
return Itr != Predecessors.end();
638+
return llvm::is_contained(Predecessors, BB);
639639
}
640640

641641
/// Test if BB is a successor of this block.
642642
bool isSuccessor(const BinaryBasicBlock *BB) const {
643-
auto Itr = std::find(Successors.begin(), Successors.end(), BB);
644-
return Itr != Successors.end();
643+
return llvm::is_contained(Successors, BB);
645644
}
646645

647646
/// Test if this BB has a valid execution count.
@@ -673,6 +672,10 @@ class BinaryBasicBlock {
673672

674673
void markValid(const bool Valid) { IsValid = Valid; }
675674

675+
FragmentNum getFragmentNum() const {
676+
return IsCold ? FragmentNum::cold() : FragmentNum::hot();
677+
}
678+
676679
bool isCold() const { return IsCold; }
677680

678681
void setIsCold(const bool Flag) { IsCold = Flag; }

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "llvm/Support/ErrorOr.h"
4040
#include "llvm/Support/raw_ostream.h"
4141
#include <functional>
42+
#include <list>
4243
#include <map>
4344
#include <set>
4445
#include <shared_mutex>
@@ -199,7 +200,7 @@ class BinaryContext {
199200
uint32_t DuplicatedJumpTables{0x10000000};
200201

201202
/// Function fragments to skip.
202-
std::vector<BinaryFunction *> FragmentsToSkip;
203+
std::unordered_set<BinaryFunction *> FragmentsToSkip;
203204

204205
/// The runtime library.
205206
std::unique_ptr<RuntimeLibrary> RtLibrary;
@@ -235,6 +236,18 @@ class BinaryContext {
235236
MIB = std::move(TargetBuilder);
236237
}
237238

239+
/// Return function fragments to skip.
240+
const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() {
241+
return FragmentsToSkip;
242+
}
243+
244+
/// Add function fragment to skip
245+
void addFragmentsToSkip(BinaryFunction *Function) {
246+
FragmentsToSkip.insert(Function);
247+
}
248+
249+
void clearFragmentsToSkip() { FragmentsToSkip.clear(); }
250+
238251
/// Given DWOId returns CU if it exists in DWOCUs.
239252
Optional<DWARFUnit *> getDWOCU(uint64_t DWOId);
240253

@@ -475,15 +488,15 @@ class BinaryContext {
475488
/// If \p NextJTAddress is different from zero, it is used as an upper
476489
/// bound for jump table memory layout.
477490
///
478-
/// Optionally, populate \p Offsets with jump table entries. The entries
491+
/// Optionally, populate \p Address from jump table entries. The entries
479492
/// could be partially populated if the jump table detection fails.
480493
bool analyzeJumpTable(const uint64_t Address,
481494
const JumpTable::JumpTableType Type, BinaryFunction &BF,
482495
const uint64_t NextJTAddress = 0,
483-
JumpTable::OffsetsType *Offsets = nullptr);
496+
JumpTable::AddressesType *EntriesAsAddress = nullptr);
484497

485498
/// After jump table locations are established, this function will populate
486-
/// their OffsetEntries based on memory contents.
499+
/// their EntriesAsAddress based on memory contents.
487500
void populateJumpTables();
488501

489502
/// Returns a jump table ID and label pointing to the duplicated jump table.
@@ -498,12 +511,12 @@ class BinaryContext {
498511
/// to function \p BF.
499512
std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
500513

501-
/// Free memory used by jump table offsets
502-
void clearJumpTableOffsets() {
514+
/// Free memory used by JumpTable's EntriesAsAddress
515+
void clearJumpTableTempData() {
503516
for (auto &JTI : JumpTables) {
504517
JumpTable &JT = *JTI.second;
505-
JumpTable::OffsetsType Temp;
506-
Temp.swap(JT.OffsetEntries);
518+
JumpTable::AddressesType Temp;
519+
Temp.swap(JT.EntriesAsAddress);
507520
}
508521
}
509522
/// Return true if the array of bytes represents a valid code padding.
@@ -641,6 +654,10 @@ class BinaryContext {
641654
/// special linux kernel sections
642655
std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers;
643656

657+
/// List of external addresses in the code that are not a function start
658+
/// and are referenced from BinaryFunction.
659+
std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences;
660+
644661
/// PseudoProbe decoder
645662
MCPseudoProbeDecoder ProbeDecoder;
646663

@@ -884,8 +901,23 @@ class BinaryContext {
884901
bool registerFragment(BinaryFunction &TargetFunction,
885902
BinaryFunction &Function) const;
886903

887-
/// Resolve inter-procedural dependencies from \p Function.
888-
void processInterproceduralReferences(BinaryFunction &Function);
904+
/// Add unterprocedural reference for \p Function to \p Address
905+
void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
906+
InterproceduralReferences.push_back({Function, Address});
907+
}
908+
909+
/// Used to fix the target of linker-generated AArch64 adrp + add
910+
/// sequence with no relocation info.
911+
void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits,
912+
MCInst &LoadHiBits, uint64_t Target);
913+
914+
/// Return true if AARch64 veneer was successfully matched at a given
915+
/// \p Address and register veneer binary function if \p MatchOnly
916+
/// argument is false.
917+
bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
918+
919+
/// Resolve inter-procedural dependencies from
920+
void processInterproceduralReferences();
889921

890922
/// Skip functions with all parent and child fragments transitively.
891923
void skipMarkedFragments();

bolt/include/bolt/Core/BinaryData.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ class BinaryData {
112112
bool nameStartsWith(StringRef Prefix) const;
113113

114114
bool hasSymbol(const MCSymbol *Symbol) const {
115-
return std::find(Symbols.begin(), Symbols.end(), Symbol) != Symbols.end();
115+
return llvm::is_contained(Symbols, Symbol);
116116
}
117117

118118
bool isAbsolute() const;

0 commit comments

Comments
 (0)