-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang] Add flag to experiment with cold function attributes #88793
Closed
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
llvmbot
added
clang
Clang issues not falling into any other category
clang:codegen
labels
Apr 15, 2024
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Arthur Eubanks (aeubanks) ChangesTo be removed and promoted to a proper driver flag if experiments turn out fruitful. Original LLVM patch for this functionality: #69030 Full diff: https://github.com/llvm/llvm-project/pull/88793.diff 1 Files Affected:
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 6cc00b85664f41..22c3f8642ad8eb 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -104,6 +104,21 @@ static cl::opt<bool> ClSanitizeOnOptimizerEarlyEP(
"sanitizer-early-opt-ep", cl::Optional,
cl::desc("Insert sanitizers on OptimizerEarlyEP."));
+// Experiment to mark cold functions as optsize/minsize/optnone.
+// TODO: remove once this is exposed as a proper driver flag.
+static cl::opt<PGOOptions::ColdFuncOpt> ClPGOColdFuncAttr(
+ "pgo-cold-func-opt", cl::init(PGOOptions::ColdFuncOpt::Default), cl::Hidden,
+ cl::desc(
+ "Function attribute to apply to cold functions as determined by PGO"),
+ cl::values(clEnumValN(PGOOptions::ColdFuncOpt::Default, "default",
+ "Default (no attribute)"),
+ clEnumValN(PGOOptions::ColdFuncOpt::OptSize, "optsize",
+ "Mark cold functions with optsize."),
+ clEnumValN(PGOOptions::ColdFuncOpt::MinSize, "minsize",
+ "Mark cold functions with minsize."),
+ clEnumValN(PGOOptions::ColdFuncOpt::OptNone, "optnone",
+ "Mark cold functions with optnone.")));
+
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
// Re-link builtin bitcodes after optimization
@@ -768,42 +783,41 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName()
: CodeGenOpts.InstrProfileOutput,
"", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr,
- PGOOptions::NoCSAction, PGOOptions::ColdFuncOpt::Default,
+ PGOOptions::NoCSAction, ClPGOColdFuncAttr,
CodeGenOpts.DebugInfoForProfiling,
/*PseudoProbeForProfiling=*/false, CodeGenOpts.AtomicProfileUpdate);
else if (CodeGenOpts.hasProfileIRUse()) {
// -fprofile-use.
auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse
: PGOOptions::NoCSAction;
- PGOOpt = PGOOptions(
- CodeGenOpts.ProfileInstrumentUsePath, "",
- CodeGenOpts.ProfileRemappingFile, CodeGenOpts.MemoryProfileUsePath, VFS,
- PGOOptions::IRUse, CSAction, PGOOptions::ColdFuncOpt::Default,
- CodeGenOpts.DebugInfoForProfiling);
+ PGOOpt = PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "",
+ CodeGenOpts.ProfileRemappingFile,
+ CodeGenOpts.MemoryProfileUsePath, VFS,
+ PGOOptions::IRUse, CSAction, ClPGOColdFuncAttr,
+ CodeGenOpts.DebugInfoForProfiling);
} else if (!CodeGenOpts.SampleProfileFile.empty())
// -fprofile-sample-use
PGOOpt = PGOOptions(
CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile,
CodeGenOpts.MemoryProfileUsePath, VFS, PGOOptions::SampleUse,
- PGOOptions::NoCSAction, PGOOptions::ColdFuncOpt::Default,
+ PGOOptions::NoCSAction, ClPGOColdFuncAttr,
CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling);
else if (!CodeGenOpts.MemoryProfileUsePath.empty())
// -fmemory-profile-use (without any of the above options)
PGOOpt = PGOOptions("", "", "", CodeGenOpts.MemoryProfileUsePath, VFS,
PGOOptions::NoAction, PGOOptions::NoCSAction,
- PGOOptions::ColdFuncOpt::Default,
- CodeGenOpts.DebugInfoForProfiling);
+ ClPGOColdFuncAttr, CodeGenOpts.DebugInfoForProfiling);
else if (CodeGenOpts.PseudoProbeForProfiling)
// -fpseudo-probe-for-profiling
- PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
- PGOOptions::NoAction, PGOOptions::NoCSAction,
- PGOOptions::ColdFuncOpt::Default,
- CodeGenOpts.DebugInfoForProfiling, true);
+ PGOOpt =
+ PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
+ PGOOptions::NoAction, PGOOptions::NoCSAction,
+ ClPGOColdFuncAttr, CodeGenOpts.DebugInfoForProfiling, true);
else if (CodeGenOpts.DebugInfoForProfiling)
// -fdebug-info-for-profiling
PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
PGOOptions::NoAction, PGOOptions::NoCSAction,
- PGOOptions::ColdFuncOpt::Default, true);
+ ClPGOColdFuncAttr, true);
// Check to see if we want to generate a CS profile.
if (CodeGenOpts.hasProfileCSIRInstr()) {
@@ -820,14 +834,13 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
: CodeGenOpts.InstrProfileOutput;
PGOOpt->CSAction = PGOOptions::CSIRInstr;
} else
- PGOOpt =
- PGOOptions("",
- CodeGenOpts.InstrProfileOutput.empty()
- ? getDefaultProfileGenName()
- : CodeGenOpts.InstrProfileOutput,
- "", /*MemoryProfile=*/"", nullptr, PGOOptions::NoAction,
- PGOOptions::CSIRInstr, PGOOptions::ColdFuncOpt::Default,
- CodeGenOpts.DebugInfoForProfiling);
+ PGOOpt = PGOOptions("",
+ CodeGenOpts.InstrProfileOutput.empty()
+ ? getDefaultProfileGenName()
+ : CodeGenOpts.InstrProfileOutput,
+ "", /*MemoryProfile=*/"", nullptr,
+ PGOOptions::NoAction, PGOOptions::CSIRInstr,
+ ClPGOColdFuncAttr, CodeGenOpts.DebugInfoForProfiling);
}
if (TM)
TM->setPGOOption(PGOOpt);
|
…m#88751) Fixes llvm#88478 Promoting the `EHCleanup` to `NormalAndEHCleanup` in `EmitCallArgs` surfaced another bug with deactivation of normal cleanups. Here we missed emitting CPP scope ends for deactivated normal cleanups. This patch also fixes that bug. We missed emitting CPP scope ends because we remove the `fallthrough` (clears the insertion point) before deactivating normal cleanups. This is to make the emitted "normal" cleanup code unreachable. But we still need to emit CPP scope ends in the original basic block even for a deactivated normal cleanup. (This worked correctly before we did not remove `fallthrough` for `EHCleanup`s).
We were calling classfiyPrim() instead of classify().
This gives us one extra SGPR to play with. The comment suggested that it could cause bugs, but I have tested it with Vulkan CTS with the default wave size for compute shaders set to 32 and did not find any problems.
…Y) & NegPow2C` (llvm#88859) Alive2: https://alive2.llvm.org/ce/z/NFtkSX This optimization will be beneficial to jemalloc users.
llvm-project/mlir/test/lib/Dialect/Test/TestDialect.cpp:597:31: error: comparison of integers of different signs: 'size_t' (aka 'unsigned long') and 'int64_t' (aka 'long') [-Werror,-Wsign-compare] if (getVarOperands().size() != expectedNumOperands) ~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~ 1 error generated.
This patch updates the definition of `omp.distribute` to enforce the restrictions of a wrapper operation.
Re-land llvm#88395 Two build-bots were broken by the old version: - https://lab.llvm.org/buildbot/#/builders/285/builds/245 - https://lab.llvm.org/buildbot/#/builders/21/builds/96988 The problem in both cases was that the compiler did not support `std::filesystem` (which I use in the unit test). I have removed the dependency upon std::filesystem because there isn't an easy way to add the right linker options so that this is supported correctly in all build environments [1] [1] https://gitlab.kitware.com/cmake/cmake/-/issues/17834 --- This is a GNU extension: https://gcc.gnu.org/onlinedocs/gfortran/ACCESS.html Used in SALMON: https://salmon-tddft.jp/download.html Unfortunately the intrinsic takes a file path to operate on so there isn't an easy way to make the test robust. The unit test expects to be able to create, set read write and execute permissions, and delete files called std::filesystem::temp_directory_path() / <test_name>.<pid> The test will fail if a file already exists with that name. I have not implemented the intrinsic on Windows because this is wrapping a POSIX system call and Windows doesn't support all of the permission bits tested by the intrinsic. I don't have a Windows machine easily available to check if Gfortran implements this intrinsic on Windows.
The old code was replaced by llvm#80019.
A va_start intrinsic lowers to something derived from the variadic parameter to the function. If there is no such parameter, it can't lower meaningfully. Clang sema rejects the same with `error: 'va_start' used in function with fixed args`. Moves the existing lint warning into a verifier error. Updates the one lit test that had a va_start in a non-variadic function.
Instead of iterating over potential induction var uses looking for suitable `arith.addi`, try to trace it back from yield argument.
This patch updates the definition of `omp.taskloop` to enforce the restrictions of a wrapper operation.
We don't need to consider the offset here anymore since we now have proper integral pointers.
llvm#88475) …return only Load since other output is chain. Added testcase that showed mismatched expected arity when Load and chain were returned as separate items after 003b58f
…lvm#86963) This patch performs several cleanups with the main purpose of normalizing the code patterns used to trigger codegen for MLIR OpenMP operations and making the processing of clauses and constructs independent. The following changes are made: - Clean up unused `directive` argument to `ClauseProcessor::processMap()`. - Move general helper functions in OpenMP.cpp to the appropriate section of the file. - Create `gen<OpName>Clauses()` functions containing the clause processing code specific for the associated OpenMP construct. - Update `gen<OpName>Op()` functions to call the corresponding `gen<OpName>Clauses()` function. - Sort calls to `ClauseProcessor::process<ClauseName>()` alphabetically, to avoid inadvertently relying on some arbitrary order. Update some tests that broke due to the order change. - Normalize `genOMP()` functions so they all delegate the generation of MLIR to `gen<OpName>Op()` functions following the same pattern. - Only process `nowait` clause on `TARGET` constructs if not compiling for the target device. A later patch can move the calls to `gen<OpName>Clauses()` out of `gen<OpName>Op()` functions and passing completed clause structures instead, in preparation to supporting composite constructs. That will make it possible to reuse clause processing for a given leaf construct when appearing alone or in a combined or composite construct, while controlling where the associated code is produced.
Check if the non-null function pointer is even valid before calling the function.
…)" This reverts commit 61717c1. Failed a MLIR test
In LoongArch psABI v2.30, the R_LARCH_ALIGN requires symbol index to support the third parameter of alignment directive. Create symbol for each section is redundant because they have section symbol which can also be used as symbol index. So use section symbol directly for R_LARCH_ALIGN.
…#88455) Currently neither the SPIR nor the SPIRV targets specify the AS for globals in their datalayout strings. This is problematic because CodeGen/LLVM will default to AS0 in this case, which produces Globals that end up in the private address space for e.g. OCL, HIPSPV or SYCL. This patch addresses it by completing the datalayout string.
…m#86312) Fixes llvm#76609 This patch does: - relax the phis constraint in `CanRedirectPredsOfEmptyBBToSucc` - guarantee the BB has multiple different predecessors to redirect, so that we can handle the case without phis in BB. Without this change and phi constraint, we may redirect the CommonPred. The motivation is consistent with JumpThreading. We always want the branch to jump more direct to the destination, without passing the middle block. In this way, we can expose more other optimization opportunities. An obivous example proposed by @dtcxzyw is like: ```llvm define i32 @test(...) { entry: br i1 %c, label %do.end, label %if.then if.then: ; preds = %entry %call2 = call i32 @dummy() %tobool3.not = icmp eq i32 %call2, 0 br i1 %tobool3.not, label %do.end, label %return do.end: ; preds = %entry, %if.then br label %return return: ; preds = %if.then, %do.end %retval.0 = phi i32 [ 0, %do.end ], [ %call2, %if.then ] ret i32 %retval.0 } ``` `entry` can directly jump to return, without passing `do.end`, and then the if-else pattern can be simplified further: ```llvm define i32 @test(...) { entry: br i1 %c, label %return, label %if.then if.then: ; preds = %entry %call2 = call i32 @dummy() br label %return return: ; preds = %if.then %retval.0 = phi i32 [ 0, %entry ], [ %call2, %if.then ] ret i32 %retval.0 } ```
aeubanks
requested review from
ayermolo,
dcci,
makslevental,
stellaraccident,
aartbik,
PeimingLiu,
yinying-lisa-li,
matthias-springer,
ftynse,
nicolasvasilache,
hanhanW,
banach-space,
dcaballe,
JDevlieghere,
tbaederr,
Endilll,
nikic and
a team
as code owners
April 17, 2024 20:16
aeubanks
removed request for
a team,
Endilll,
dcaballe,
PeimingLiu,
aartbik,
ayermolo,
tbaederr,
david-xl and
yinying-lisa-li
April 17, 2024 20:17
messed up merge (again...) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
To be removed and promoted to a proper driver flag if experiments turn out fruitful.
Original LLVM patch for this functionality: #69030